User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = params$norm.pred
eda = params$eda
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 9
##  $ output.var         : chr "y3"
##  $ log.pred           : logi FALSE
##  $ norm.pred          : logi FALSE
##  $ eda                : logi FALSE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var
# output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.cuberoot') else  output.var.tr = output.var
# output.var.tr = if (norm.pred == TRUE)  paste0(output.var,'.bestnorm') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3

if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
# if(log.pred==TRUE) data[[output.var.tr]] = (data[[output.var]])^(1/3) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

if (norm.pred == TRUE){
  t=bestNormalize::bestNormalize(data[[output.var]])
  t
  qqnorm(data[[output.var]])
  qqnorm(predict(t))
  data[[output.var.tr]] = predict(t)
}

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

Feature Engineering

data$x2byx1 = data$x2/data$x1
data$x6byx5 = data$x6/data$x5
data$x9byx7 = data$x9/data$x7
data$x10byx8 = data$x10/data$x8
data$x14byx12 = data$x14/data$x12
data$x15byx13 = data$x15/data$x13
data$x17byx16 = data$x17/data$x16
data$x19byx18 = data$x19/data$x18
data$x21byx20 = data$x21/data$x20
data$x23byx22 = data$x23/data$x22
data$x1log = log(data$x1)
data$x2log = log(data$x2)
data$x5log = log(data$x5)
data$x6log = log(data$x6)
data$x7log = log(data$x7)
data$x9log = log(data$x9)
data$x8log = log(data$x8)
data$x10log = log(data$x10)
data$x12log = log(data$x12)
data$x14log = log(data$x14)
data$x13log = log(data$x13)
data$x15log = log(data$x15)
data$x16log = log(data$x16)
data$x17log = log(data$x17)
data$x18log = log(data$x18)
data$x19log = log(data$x19)
data$x20log = log(data$x20)
data$x21log = log(data$x21)
data$x22log = log(data$x22)
data$x23log = log(data$x23)
data$x11log = log(data$x11)
data$x1sqinv = 1/(data$x1)^2 
data$x5sqinv = 1/(data$x5)^2 
data$x7sqinv = 1/(data$x7)^2 
data$x8sqinv = 1/(data$x8)^2 
data$x12sqinv = 1/(data$x12)^2 
data$x13sqinv = 1/(data$x13)^2 
data$x16sqinv = 1/(data$x16)^2 
data$x18sqinv = 1/(data$x18)^2 
data$x20sqinv = 1/(data$x20)^2 
data$x22sqinv = 1/(data$x22)^2 
predictors
##   [1] "x1"      "x2"      "x3"      "x4"      "x5"      "x6"      "x7"      "x8"      "x9"      "x10"     "x11"    
##  [12] "x12"     "x13"     "x14"     "x15"     "x16"     "x17"     "x18"     "x19"     "x20"     "x21"     "x22"    
##  [23] "x23"     "stat1"   "stat2"   "stat3"   "stat4"   "stat5"   "stat6"   "stat7"   "stat8"   "stat9"   "stat10" 
##  [34] "stat11"  "stat12"  "stat13"  "stat14"  "stat15"  "stat16"  "stat17"  "stat18"  "stat19"  "stat20"  "stat21" 
##  [45] "stat22"  "stat23"  "stat24"  "stat25"  "stat26"  "stat27"  "stat28"  "stat29"  "stat30"  "stat31"  "stat32" 
##  [56] "stat33"  "stat34"  "stat35"  "stat36"  "stat37"  "stat38"  "stat39"  "stat40"  "stat41"  "stat42"  "stat43" 
##  [67] "stat44"  "stat45"  "stat46"  "stat47"  "stat48"  "stat49"  "stat50"  "stat51"  "stat52"  "stat53"  "stat54" 
##  [78] "stat55"  "stat56"  "stat57"  "stat58"  "stat59"  "stat60"  "stat61"  "stat62"  "stat63"  "stat64"  "stat65" 
##  [89] "stat66"  "stat67"  "stat68"  "stat69"  "stat70"  "stat71"  "stat72"  "stat73"  "stat74"  "stat75"  "stat76" 
## [100] "stat77"  "stat78"  "stat79"  "stat80"  "stat81"  "stat82"  "stat83"  "stat84"  "stat85"  "stat86"  "stat87" 
## [111] "stat88"  "stat89"  "stat90"  "stat91"  "stat92"  "stat93"  "stat94"  "stat95"  "stat96"  "stat97"  "stat98" 
## [122] "stat99"  "stat100" "stat101" "stat102" "stat103" "stat104" "stat105" "stat106" "stat107" "stat108" "stat109"
## [133] "stat110" "stat111" "stat112" "stat113" "stat114" "stat115" "stat116" "stat117" "stat118" "stat119" "stat120"
## [144] "stat121" "stat122" "stat123" "stat124" "stat125" "stat126" "stat127" "stat128" "stat129" "stat130" "stat131"
## [155] "stat132" "stat133" "stat134" "stat135" "stat136" "stat137" "stat138" "stat139" "stat140" "stat141" "stat142"
## [166] "stat143" "stat144" "stat145" "stat146" "stat147" "stat148" "stat149" "stat150" "stat151" "stat152" "stat153"
## [177] "stat154" "stat155" "stat156" "stat157" "stat158" "stat159" "stat160" "stat161" "stat162" "stat163" "stat164"
## [188] "stat165" "stat166" "stat167" "stat168" "stat169" "stat170" "stat171" "stat172" "stat173" "stat174" "stat175"
## [199] "stat176" "stat177" "stat178" "stat179" "stat180" "stat181" "stat182" "stat183" "stat184" "stat185" "stat186"
## [210] "stat187" "stat188" "stat189" "stat190" "stat191" "stat192" "stat193" "stat194" "stat195" "stat196" "stat197"
## [221] "stat198" "stat199" "stat200" "stat201" "stat202" "stat203" "stat204" "stat205" "stat206" "stat207" "stat208"
## [232] "stat209" "stat210" "stat211" "stat212" "stat213" "stat214" "stat215" "stat216" "stat217"
controlled.vars = colnames(data)[grep("^x",colnames(data))]
stat.vars = colnames(data)[grep("^stat",colnames(data))]

predictors = c(controlled.vars,stat.vars)
predictors
##   [1] "x1"       "x2"       "x3"       "x4"       "x5"       "x6"       "x7"       "x8"       "x9"       "x10"     
##  [11] "x11"      "x12"      "x13"      "x14"      "x15"      "x16"      "x17"      "x18"      "x19"      "x20"     
##  [21] "x21"      "x22"      "x23"      "x2byx1"   "x6byx5"   "x9byx7"   "x10byx8"  "x14byx12" "x15byx13" "x17byx16"
##  [31] "x19byx18" "x21byx20" "x23byx22" "x1log"    "x2log"    "x5log"    "x6log"    "x7log"    "x9log"    "x8log"   
##  [41] "x10log"   "x12log"   "x14log"   "x13log"   "x15log"   "x16log"   "x17log"   "x18log"   "x19log"   "x20log"  
##  [51] "x21log"   "x22log"   "x23log"   "x11log"   "x1sqinv"  "x5sqinv"  "x7sqinv"  "x8sqinv"  "x12sqinv" "x13sqinv"
##  [61] "x16sqinv" "x18sqinv" "x20sqinv" "x22sqinv" "stat1"    "stat2"    "stat3"    "stat4"    "stat5"    "stat6"   
##  [71] "stat7"    "stat8"    "stat9"    "stat10"   "stat11"   "stat12"   "stat13"   "stat14"   "stat15"   "stat16"  
##  [81] "stat17"   "stat18"   "stat19"   "stat20"   "stat21"   "stat22"   "stat23"   "stat24"   "stat25"   "stat26"  
##  [91] "stat27"   "stat28"   "stat29"   "stat30"   "stat31"   "stat32"   "stat33"   "stat34"   "stat35"   "stat36"  
## [101] "stat37"   "stat38"   "stat39"   "stat40"   "stat41"   "stat42"   "stat43"   "stat44"   "stat45"   "stat46"  
## [111] "stat47"   "stat48"   "stat49"   "stat50"   "stat51"   "stat52"   "stat53"   "stat54"   "stat55"   "stat56"  
## [121] "stat57"   "stat58"   "stat59"   "stat60"   "stat61"   "stat62"   "stat63"   "stat64"   "stat65"   "stat66"  
## [131] "stat67"   "stat68"   "stat69"   "stat70"   "stat71"   "stat72"   "stat73"   "stat74"   "stat75"   "stat76"  
## [141] "stat77"   "stat78"   "stat79"   "stat80"   "stat81"   "stat82"   "stat83"   "stat84"   "stat85"   "stat86"  
## [151] "stat87"   "stat88"   "stat89"   "stat90"   "stat91"   "stat92"   "stat93"   "stat94"   "stat95"   "stat96"  
## [161] "stat97"   "stat98"   "stat99"   "stat100"  "stat101"  "stat102"  "stat103"  "stat104"  "stat105"  "stat106" 
## [171] "stat107"  "stat108"  "stat109"  "stat110"  "stat111"  "stat112"  "stat113"  "stat114"  "stat115"  "stat116" 
## [181] "stat117"  "stat118"  "stat119"  "stat120"  "stat121"  "stat122"  "stat123"  "stat124"  "stat125"  "stat126" 
## [191] "stat127"  "stat128"  "stat129"  "stat130"  "stat131"  "stat132"  "stat133"  "stat134"  "stat135"  "stat136" 
## [201] "stat137"  "stat138"  "stat139"  "stat140"  "stat141"  "stat142"  "stat143"  "stat144"  "stat145"  "stat146" 
## [211] "stat147"  "stat148"  "stat149"  "stat150"  "stat151"  "stat152"  "stat153"  "stat154"  "stat155"  "stat156" 
## [221] "stat157"  "stat158"  "stat159"  "stat160"  "stat161"  "stat162"  "stat163"  "stat164"  "stat165"  "stat166" 
## [231] "stat167"  "stat168"  "stat169"  "stat170"  "stat171"  "stat172"  "stat173"  "stat174"  "stat175"  "stat176" 
## [241] "stat177"  "stat178"  "stat179"  "stat180"  "stat181"  "stat182"  "stat183"  "stat184"  "stat185"  "stat186" 
## [251] "stat187"  "stat188"  "stat189"  "stat190"  "stat191"  "stat192"  "stat193"  "stat194"  "stat195"  "stat196" 
## [261] "stat197"  "stat198"  "stat199"  "stat200"  "stat201"  "stat202"  "stat203"  "stat204"  "stat205"  "stat206" 
## [271] "stat207"  "stat208"  "stat209"  "stat210"  "stat211"  "stat212"  "stat213"  "stat214"  "stat215"  "stat216" 
## [281] "stat217"

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

if (eda == TRUE){
  cols = c('x11','x18','stat98','x7','stat110')
  df=gather(select_at(data,cols))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=3)
  
  # ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
  #   stat_qq()+
  #   facet_wrap(~key, scales = 'free',ncol=2)
  
  lapply(select_at(data,cols),summary)
}

Scatter plot vs. output variable **y3

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light green',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=3)
}

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

if (eda == TRUE){
  df=gather(select_at(data,predictors))
  ggplot(df, aes(value)) + 
    geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
    geom_density() + 
    # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
    facet_wrap(~key, scales = 'free',ncol=4)
}

Correlations

With Output Variable

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  # https://stackoverflow.com/questions/27034655/how-to-use-dplyrarrangedesc-when-using-a-string-as-column-name
  t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                            ,select_at(data,output.var.tr)),4))  %>%
    #rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
    rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-!!sym(output.var.tr))
  #DT::datatable(t)
  message("Top Positive")
  #kable(head(arrange(t,desc(y3.log)),20))
  kable(head(arrange(t,desc(!!sym(output.var.tr))),20))
  message("Top Negative")
  #kable(head(arrange(t,y3.log),20))
  kable(head(arrange(t,!!sym(output.var.tr)),20))
}

Between All Variables

if (eda == TRUE){
  #chart.Correlation(select(data,-JobName),  pch=21)
  t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
  #DT::datatable(t,options=list(scrollX=T))
  message("Showing only 10 variables")
  kable(t[1:10,1:10])
}

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3)

if (eda == TRUE){
  d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
  ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
    geom_point(color='light blue',alpha=0.5) + 
    geom_smooth() + 
    facet_wrap(~target, scales = 'free',ncol=4)
}

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

if (eda == TRUE){
  vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
  head(vifDF,75)
}

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  #dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

# 0 for no interaction, 
# 1 for Full 2 way interaction and 
# 2 for Selective 2 way interaction
# 3 for Selective 3 way interaction
InteractionMode = 2

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]


# http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
# #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
registerDoParallel(cl)

if(InteractionMode == 1){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
}
if (InteractionMode == 0){
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
if (InteractionMode >= 2 & InteractionMode <= 3){
  controlled.vars = pca.vars[grep("^x",pca.vars)]
  stat.vars = pca.vars[grep("^stat",pca.vars)]
  
  if (InteractionMode >= 2){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^2')
  }
  if (InteractionMode >= 3){
    interaction.form = paste0('~(',paste0(controlled.vars, collapse ='+'),')^3')
  }
  no.interact.form = paste0(stat.vars, collapse ='+')
  
  pca.formula = as.formula(paste(interaction.form, no.interact.form, sep = "+"))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
}

stopCluster(cl)
registerDoSEQ() # register sequential engine in case you are not using this function anymore
targetCumVar = .9

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 164 PCAs justify 90.0% of the total Variance. (90.0%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data.pca[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  # #cl <- makeCluster(ceiling(detectCores()*0.5)) # use 75% of cores only, leave rest for other tasks
  cl <- makeCluster(detectCores()*0.75) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE:", test.rmse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    x = 10^test[,label.names]
    y = 10^pred[,1]
    # x = (test[,label.names])^3
    # y = (pred[,1])^3
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  test.mse = mean((x-y)^2)
  print (paste(method, subopt, "Test MSE (Org Scale):", test.mse, sep=" "))
  
  test.rmse = sqrt(test.mse)
  print (paste(method, subopt, "Test RMSE (Org Scale):", test.rmse, sep=" "))

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted")
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3 ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 + 
##     PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + PC19 + 
##     PC20 + PC21 + PC22 + PC23 + PC24 + PC25 + PC26 + PC27 + PC28 + 
##     PC29 + PC30 + PC31 + PC32 + PC33 + PC34 + PC35 + PC36 + PC37 + 
##     PC38 + PC39 + PC40 + PC41 + PC42 + PC43 + PC44 + PC45 + PC46 + 
##     PC47 + PC48 + PC49 + PC50 + PC51 + PC52 + PC53 + PC54 + PC55 + 
##     PC56 + PC57 + PC58 + PC59 + PC60 + PC61 + PC62 + PC63 + PC64 + 
##     PC65 + PC66 + PC67 + PC68 + PC69 + PC70 + PC71 + PC72 + PC73 + 
##     PC74 + PC75 + PC76 + PC77 + PC78 + PC79 + PC80 + PC81 + PC82 + 
##     PC83 + PC84 + PC85 + PC86 + PC87 + PC88 + PC89 + PC90 + PC91 + 
##     PC92 + PC93 + PC94 + PC95 + PC96 + PC97 + PC98 + PC99 + PC100 + 
##     PC101 + PC102 + PC103 + PC104 + PC105 + PC106 + PC107 + PC108 + 
##     PC109 + PC110 + PC111 + PC112 + PC113 + PC114 + PC115 + PC116 + 
##     PC117 + PC118 + PC119 + PC120 + PC121 + PC122 + PC123 + PC124 + 
##     PC125 + PC126 + PC127 + PC128 + PC129 + PC130 + PC131 + PC132 + 
##     PC133 + PC134 + PC135 + PC136 + PC137 + PC138 + PC139 + PC140 + 
##     PC141 + PC142 + PC143 + PC144 + PC145 + PC146 + PC147 + PC148 + 
##     PC149 + PC150 + PC151 + PC152 + PC153 + PC154 + PC155 + PC156 + 
##     PC157 + PC158 + PC159 + PC160 + PC161 + PC162 + PC163 + PC164
print(grand.mean.formula)
## y3 ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.559  -6.528  -1.910   4.587  58.176 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 125.552547   0.127282 986.415  < 2e-16 ***
## PC1          -0.130695   0.011101 -11.773  < 2e-16 ***
## PC2          -0.260821   0.011152 -23.387  < 2e-16 ***
## PC3          -0.124379   0.011250 -11.056  < 2e-16 ***
## PC4          -0.096620   0.011449  -8.439  < 2e-16 ***
## PC5           0.051682   0.011796   4.381 1.20e-05 ***
## PC6          -0.032606   0.011811  -2.761 0.005789 ** 
## PC7          -0.049668   0.012120  -4.098 4.23e-05 ***
## PC8          -0.012792   0.012292  -1.041 0.298055    
## PC9          -0.013346   0.012711  -1.050 0.293785    
## PC10         -0.006503   0.012877  -0.505 0.613595    
## PC11         -0.146129   0.013684 -10.679  < 2e-16 ***
## PC12         -0.148741   0.014590 -10.195  < 2e-16 ***
## PC13          0.088756   0.014742   6.021 1.85e-09 ***
## PC14          0.078830   0.015268   5.163 2.52e-07 ***
## PC15         -0.006925   0.015443  -0.448 0.653847    
## PC16          0.107353   0.015781   6.802 1.14e-11 ***
## PC17         -0.066451   0.016535  -4.019 5.93e-05 ***
## PC18         -0.111549   0.017366  -6.423 1.45e-10 ***
## PC19          0.013614   0.017579   0.774 0.438712    
## PC20          0.123055   0.019113   6.438 1.31e-10 ***
## PC21          0.024074   0.019904   1.210 0.226522    
## PC22          0.037252   0.031219   1.193 0.232828    
## PC23          0.070534   0.038414   1.836 0.066393 .  
## PC24         -0.214852   0.045052  -4.769 1.90e-06 ***
## PC25          0.031963   0.050488   0.633 0.526715    
## PC26          0.100858   0.051897   1.943 0.052016 .  
## PC27          0.095112   0.052006   1.829 0.067477 .  
## PC28          0.043043   0.052392   0.822 0.411363    
## PC29          0.151608   0.057843   2.621 0.008791 ** 
## PC30          0.020218   0.058540   0.345 0.729832    
## PC31         -0.061870   0.062976  -0.982 0.325927    
## PC32         -0.225948   0.063700  -3.547 0.000393 ***
## PC33          0.074412   0.065336   1.139 0.254790    
## PC34          0.349027   0.068708   5.080 3.90e-07 ***
## PC35         -0.001661   0.073756  -0.023 0.982031    
## PC36         -0.013098   0.074961  -0.175 0.861300    
## PC37         -0.122004   0.077447  -1.575 0.115243    
## PC38         -0.006577   0.080756  -0.081 0.935089    
## PC39         -0.052624   0.081769  -0.644 0.519886    
## PC40         -0.109715   0.082730  -1.326 0.184836    
## PC41          0.026978   0.084442   0.319 0.749371    
## PC42         -0.056771   0.084511  -0.672 0.501768    
## PC43          0.003741   0.085730   0.044 0.965199    
## PC44          0.029723   0.086155   0.345 0.730110    
## PC45         -0.016850   0.086190  -0.196 0.845006    
## PC46          0.123263   0.087149   1.414 0.157300    
## PC47         -0.128422   0.087421  -1.469 0.141890    
## PC48          0.025183   0.088223   0.285 0.775313    
## PC49         -0.023187   0.089192  -0.260 0.794897    
## PC50         -0.093576   0.090436  -1.035 0.300841    
## PC51          0.022880   0.092131   0.248 0.803881    
## PC52         -0.023793   0.092430  -0.257 0.796870    
## PC53          0.053486   0.090206   0.593 0.553254    
## PC54         -0.047300   0.091585  -0.516 0.605552    
## PC55          0.027094   0.092434   0.293 0.769447    
## PC56         -0.002177   0.093172  -0.023 0.981360    
## PC57         -0.158245   0.094245  -1.679 0.093195 .  
## PC58         -0.027666   0.094282  -0.293 0.769203    
## PC59          0.249173   0.093780   2.657 0.007907 ** 
## PC60         -0.107319   0.093769  -1.145 0.252461    
## PC61          0.103938   0.095152   1.092 0.274735    
## PC62         -0.122479   0.095046  -1.289 0.197583    
## PC63         -0.108455   0.095320  -1.138 0.255252    
## PC64         -0.233860   0.096113  -2.433 0.014999 *  
## PC65         -0.041726   0.096420  -0.433 0.665214    
## PC66         -0.140816   0.097319  -1.447 0.147968    
## PC67         -0.036982   0.097679  -0.379 0.704995    
## PC68          0.272055   0.099050   2.747 0.006041 ** 
## PC69          0.115549   0.099189   1.165 0.244096    
## PC70         -0.019454   0.098789  -0.197 0.843892    
## PC71          0.256983   0.098973   2.596 0.009444 ** 
## PC72         -0.008482   0.100528  -0.084 0.932765    
## PC73          0.060640   0.100446   0.604 0.546065    
## PC74         -0.106636   0.100356  -1.063 0.288020    
## PC75         -0.201567   0.100584  -2.004 0.045124 *  
## PC76          0.018524   0.101266   0.183 0.854862    
## PC77          0.162363   0.101896   1.593 0.111125    
## PC78          0.057342   0.102247   0.561 0.574943    
## PC79          0.118704   0.103255   1.150 0.250352    
## PC80         -0.089778   0.102873  -0.873 0.382864    
## PC81          0.214793   0.104055   2.064 0.039044 *  
## PC82          0.109176   0.104576   1.044 0.296536    
## PC83         -0.249264   0.104612  -2.383 0.017218 *  
## PC84          0.218837   0.104554   2.093 0.036391 *  
## PC85          0.352160   0.105441   3.340 0.000844 ***
## PC86         -0.083950   0.105906  -0.793 0.427995    
## PC87          0.454672   0.106814   4.257 2.11e-05 ***
## PC88         -0.208224   0.107420  -1.938 0.052625 .  
## PC89         -0.198925   0.107888  -1.844 0.065265 .  
## PC90         -0.188013   0.106515  -1.765 0.077597 .  
## PC91          0.069569   0.107226   0.649 0.516488    
## PC92          0.043564   0.108521   0.401 0.688120    
## PC93         -0.014071   0.108731  -0.129 0.897035    
## PC94         -0.219505   0.109094  -2.012 0.044262 *  
## PC95          0.008199   0.108770   0.075 0.939916    
## PC96         -0.213668   0.110188  -1.939 0.052538 .  
## PC97         -0.137448   0.109913  -1.251 0.211165    
## PC98         -0.088292   0.109431  -0.807 0.419805    
## PC99         -0.134524   0.109136  -1.233 0.217769    
## PC100         0.007040   0.110538   0.064 0.949221    
## PC101        -0.119594   0.109832  -1.089 0.276257    
## PC102        -0.219019   0.110680  -1.979 0.047884 *  
## PC103         0.114483   0.111035   1.031 0.302558    
## PC104        -0.159338   0.111205  -1.433 0.151963    
## PC105         0.174623   0.110913   1.574 0.115451    
## PC106         0.292742   0.111354   2.629 0.008589 ** 
## PC107        -0.050182   0.111351  -0.451 0.652252    
## PC108         0.183346   0.111864   1.639 0.101270    
## PC109        -0.037107   0.112625  -0.329 0.741809    
## PC110        -0.064925   0.112004  -0.580 0.562162    
## PC111        -0.176545   0.112333  -1.572 0.116098    
## PC112        -0.015230   0.112019  -0.136 0.891856    
## PC113         0.101870   0.112348   0.907 0.364586    
## PC114        -0.149983   0.112638  -1.332 0.183062    
## PC115        -0.411842   0.112941  -3.647 0.000268 ***
## PC116        -0.056480   0.112580  -0.502 0.615904    
## PC117        -0.022949   0.112846  -0.203 0.838860    
## PC118         0.161193   0.112775   1.429 0.152966    
## PC119        -0.233878   0.113487  -2.061 0.039366 *  
## PC120         0.069873   0.113676   0.615 0.538797    
## PC121        -0.110230   0.113963  -0.967 0.333467    
## PC122         0.163345   0.115118   1.419 0.155977    
## PC123        -0.244226   0.113240  -2.157 0.031072 *  
## PC124         0.144407   0.113631   1.271 0.203838    
## PC125         0.111729   0.114700   0.974 0.330049    
## PC126         0.126903   0.114953   1.104 0.269661    
## PC127         0.062068   0.114381   0.543 0.587399    
## PC128        -0.160221   0.115043  -1.393 0.163768    
## PC129        -0.087781   0.115363  -0.761 0.446743    
## PC130         0.095916   0.115129   0.833 0.404811    
## PC131        -0.304885   0.115152  -2.648 0.008128 ** 
## PC132         0.069923   0.114831   0.609 0.542600    
## PC133         0.050821   0.114804   0.443 0.658019    
## PC134         0.235247   0.116936   2.012 0.044294 *  
## PC135         0.201328   0.115932   1.737 0.082514 .  
## PC136         0.105721   0.115691   0.914 0.360850    
## PC137        -0.129377   0.116676  -1.109 0.267541    
## PC138         0.146849   0.115649   1.270 0.204214    
## PC139        -0.214533   0.116354  -1.844 0.065267 .  
## PC140        -0.054362   0.116504  -0.467 0.640796    
## PC141         0.094664   0.116665   0.811 0.417161    
## PC142        -0.079940   0.117418  -0.681 0.496014    
## PC143         0.086901   0.117155   0.742 0.458261    
## PC144         0.377176   0.117929   3.198 0.001390 ** 
## PC145         0.069323   0.117751   0.589 0.556073    
## PC146         0.279495   0.118014   2.368 0.017904 *  
## PC147         0.041163   0.117546   0.350 0.726216    
## PC148        -0.077193   0.118643  -0.651 0.515308    
## PC149         0.043515   0.117895   0.369 0.712069    
## PC150         0.077255   0.118249   0.653 0.513574    
## PC151         0.156719   0.118263   1.325 0.185171    
## PC152        -0.028463   0.117958  -0.241 0.809334    
## PC153         0.148012   0.119046   1.243 0.213806    
## PC154        -0.189009   0.119413  -1.583 0.113523    
## PC155         0.208139   0.118623   1.755 0.079381 .  
## PC156         0.253808   0.119154   2.130 0.033209 *  
## PC157         0.018220   0.119371   0.153 0.878694    
## PC158        -0.090117   0.118652  -0.760 0.447580    
## PC159         0.460142   0.119420   3.853 0.000118 ***
## PC160        -0.024040   0.119045  -0.202 0.839971    
## PC161         0.095285   0.118905   0.801 0.422965    
## PC162        -0.400365   0.119876  -3.340 0.000844 ***
## PC163         0.312879   0.120148   2.604 0.009236 ** 
## PC164         0.073022   0.120239   0.607 0.543673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.488 on 5419 degrees of freedom
## Multiple R-squared:  0.2434, Adjusted R-squared:  0.2205 
## F-statistic: 10.63 on 164 and 5419 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 266"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.097  -5.541  -1.139   4.651  25.298 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  1.244e+02  1.026e-01 1211.661  < 2e-16 ***
## PC1         -1.387e-01  9.135e-03  -15.185  < 2e-16 ***
## PC2         -2.528e-01  9.028e-03  -28.006  < 2e-16 ***
## PC3         -1.294e-01  9.122e-03  -14.183  < 2e-16 ***
## PC4         -1.084e-01  9.265e-03  -11.705  < 2e-16 ***
## PC5          4.798e-02  9.554e-03    5.022 5.30e-07 ***
## PC6         -1.901e-02  9.574e-03   -1.986 0.047121 *  
## PC7         -5.146e-02  9.796e-03   -5.253 1.56e-07 ***
## PC8         -1.544e-02  9.978e-03   -1.548 0.121803    
## PC9          3.474e-03  1.028e-02    0.338 0.735539    
## PC10         1.969e-03  1.042e-02    0.189 0.850125    
## PC11        -1.659e-01  1.104e-02  -15.026  < 2e-16 ***
## PC12        -1.532e-01  1.175e-02  -13.033  < 2e-16 ***
## PC13         8.593e-02  1.192e-02    7.208 6.49e-13 ***
## PC14         7.124e-02  1.230e-02    5.791 7.41e-09 ***
## PC15        -1.274e-02  1.250e-02   -1.020 0.307851    
## PC16         9.144e-02  1.274e-02    7.178 8.10e-13 ***
## PC17        -6.861e-02  1.332e-02   -5.150 2.70e-07 ***
## PC18        -1.098e-01  1.398e-02   -7.860 4.66e-15 ***
## PC19         2.271e-02  1.421e-02    1.598 0.110079    
## PC20         1.257e-01  1.543e-02    8.142 4.82e-16 ***
## PC21         2.261e-02  1.605e-02    1.409 0.158967    
## PC22         6.784e-02  2.518e-02    2.695 0.007069 ** 
## PC23         7.010e-02  3.151e-02    2.225 0.026138 *  
## PC24        -2.453e-01  3.655e-02   -6.711 2.15e-11 ***
## PC25         7.240e-02  4.102e-02    1.765 0.077614 .  
## PC26         3.136e-02  4.210e-02    0.745 0.456394    
## PC27         3.908e-02  4.222e-02    0.926 0.354654    
## PC28         1.435e-02  4.251e-02    0.338 0.735715    
## PC29         1.525e-01  4.670e-02    3.266 0.001097 ** 
## PC30         2.199e-02  4.770e-02    0.461 0.644878    
## PC31        -5.876e-02  5.107e-02   -1.151 0.249986    
## PC32        -1.903e-01  5.147e-02   -3.697 0.000220 ***
## PC33        -4.745e-03  5.336e-02   -0.089 0.929149    
## PC34         3.395e-01  5.543e-02    6.125 9.75e-10 ***
## PC35         3.165e-02  6.021e-02    0.526 0.599208    
## PC36        -3.541e-02  6.089e-02   -0.582 0.560833    
## PC37        -1.356e-01  6.257e-02   -2.167 0.030289 *  
## PC38        -4.588e-04  6.511e-02   -0.007 0.994378    
## PC39        -3.762e-02  6.945e-02   -0.542 0.588071    
## PC40        -1.299e-01  6.773e-02   -1.918 0.055170 .  
## PC41        -5.088e-02  6.905e-02   -0.737 0.461240    
## PC42         6.146e-02  6.933e-02    0.886 0.375395    
## PC43         1.560e-01  7.018e-02    2.223 0.026291 *  
## PC44        -3.346e-02  7.183e-02   -0.466 0.641382    
## PC45         3.110e-02  7.017e-02    0.443 0.657595    
## PC46         1.095e-01  7.095e-02    1.544 0.122717    
## PC47        -1.575e-01  7.154e-02   -2.202 0.027712 *  
## PC48         7.557e-02  7.201e-02    1.049 0.294045    
## PC49         5.399e-02  7.307e-02    0.739 0.459995    
## PC50        -1.241e-01  7.415e-02   -1.673 0.094345 .  
## PC51         1.178e-01  7.581e-02    1.554 0.120358    
## PC52        -3.642e-02  7.552e-02   -0.482 0.629676    
## PC53         1.090e-01  7.357e-02    1.482 0.138433    
## PC54        -8.821e-02  7.536e-02   -1.170 0.241877    
## PC55        -6.475e-02  7.589e-02   -0.853 0.393592    
## PC56        -2.840e-02  7.639e-02   -0.372 0.710088    
## PC57        -1.473e-01  7.682e-02   -1.917 0.055277 .  
## PC58        -1.075e-01  7.686e-02   -1.399 0.161973    
## PC59         3.114e-01  7.691e-02    4.049 5.22e-05 ***
## PC60        -1.289e-01  7.680e-02   -1.678 0.093377 .  
## PC61         4.419e-02  7.703e-02    0.574 0.566276    
## PC62        -7.458e-02  7.749e-02   -0.962 0.335849    
## PC63        -9.389e-02  7.790e-02   -1.205 0.228184    
## PC64        -2.012e-01  7.842e-02   -2.566 0.010316 *  
## PC65         2.832e-03  7.868e-02    0.036 0.971287    
## PC66        -6.708e-02  7.975e-02   -0.841 0.400284    
## PC67        -1.056e-02  7.957e-02   -0.133 0.894429    
## PC68         2.440e-01  8.070e-02    3.023 0.002516 ** 
## PC69         1.354e-01  8.088e-02    1.675 0.094085 .  
## PC70         3.295e-02  7.998e-02    0.412 0.680405    
## PC71         1.313e-01  8.018e-02    1.637 0.101678    
## PC72        -2.922e-02  8.155e-02   -0.358 0.720132    
## PC73         1.128e-01  8.138e-02    1.386 0.165818    
## PC74        -1.604e-03  8.174e-02   -0.020 0.984343    
## PC75        -1.044e-01  8.160e-02   -1.279 0.200901    
## PC76        -1.006e-01  8.203e-02   -1.226 0.220298    
## PC77         1.535e-01  8.273e-02    1.856 0.063575 .  
## PC78        -3.233e-02  8.284e-02   -0.390 0.696358    
## PC79         1.489e-01  8.403e-02    1.772 0.076441 .  
## PC80        -4.721e-02  8.317e-02   -0.568 0.570284    
## PC81         2.585e-01  8.417e-02    3.071 0.002147 ** 
## PC82         6.161e-02  8.460e-02    0.728 0.466505    
## PC83        -1.851e-01  8.523e-02   -2.172 0.029901 *  
## PC84         2.278e-01  8.484e-02    2.686 0.007265 ** 
## PC85         4.218e-01  8.563e-02    4.927 8.63e-07 ***
## PC86         1.149e-02  8.551e-02    0.134 0.893133    
## PC87         4.089e-01  8.631e-02    4.737 2.23e-06 ***
## PC88        -2.217e-01  8.712e-02   -2.545 0.010959 *  
## PC89        -1.354e-01  8.731e-02   -1.551 0.120950    
## PC90        -1.792e-01  8.648e-02   -2.072 0.038331 *  
## PC91         2.582e-03  8.658e-02    0.030 0.976207    
## PC92         1.286e-01  8.773e-02    1.466 0.142639    
## PC93        -1.617e-01  8.827e-02   -1.832 0.067056 .  
## PC94        -1.226e-01  8.822e-02   -1.390 0.164621    
## PC95         3.811e-02  8.814e-02    0.432 0.665512    
## PC96        -1.657e-01  8.891e-02   -1.863 0.062472 .  
## PC97        -4.926e-02  8.898e-02   -0.554 0.579872    
## PC98        -1.126e-01  8.831e-02   -1.275 0.202522    
## PC99        -4.562e-02  8.818e-02   -0.517 0.604946    
## PC100       -7.345e-02  8.908e-02   -0.825 0.409672    
## PC101       -1.919e-01  8.886e-02   -2.159 0.030888 *  
## PC102       -1.218e-01  8.941e-02   -1.363 0.173022    
## PC103        1.038e-01  8.994e-02    1.154 0.248617    
## PC104       -1.399e-01  8.947e-02   -1.564 0.117915    
## PC105        1.565e-01  8.978e-02    1.743 0.081363 .  
## PC106        2.343e-01  8.986e-02    2.607 0.009148 ** 
## PC107        5.481e-03  9.064e-02    0.060 0.951781    
## PC108        8.837e-02  9.018e-02    0.980 0.327152    
## PC109       -1.969e-02  9.090e-02   -0.217 0.828523    
## PC110       -9.410e-02  9.036e-02   -1.041 0.297710    
## PC111       -1.996e-01  9.095e-02   -2.195 0.028226 *  
## PC112       -2.590e-02  9.058e-02   -0.286 0.774906    
## PC113        6.579e-02  9.060e-02    0.726 0.467795    
## PC114       -1.409e-01  9.108e-02   -1.547 0.121814    
## PC115       -4.692e-01  9.130e-02   -5.140 2.86e-07 ***
## PC116       -1.894e-02  9.077e-02   -0.209 0.834745    
## PC117        6.540e-04  9.108e-02    0.007 0.994271    
## PC118        9.801e-02  9.105e-02    1.076 0.281790    
## PC119       -1.983e-01  9.167e-02   -2.163 0.030586 *  
## PC120        9.382e-02  9.158e-02    1.024 0.305677    
## PC121       -1.873e-01  9.187e-02   -2.038 0.041569 *  
## PC122        1.430e-01  9.292e-02    1.539 0.123894    
## PC123       -2.310e-01  9.140e-02   -2.527 0.011520 *  
## PC124        6.071e-02  9.207e-02    0.659 0.509678    
## PC125        2.194e-01  9.287e-02    2.363 0.018168 *  
## PC126        1.036e-01  9.276e-02    1.117 0.264109    
## PC127       -4.902e-02  9.210e-02   -0.532 0.594567    
## PC128       -1.509e-01  9.284e-02   -1.625 0.104202    
## PC129       -8.941e-03  9.369e-02   -0.095 0.923975    
## PC130        9.677e-02  9.305e-02    1.040 0.298415    
## PC131       -1.550e-01  9.291e-02   -1.668 0.095309 .  
## PC132        3.561e-02  9.255e-02    0.385 0.700426    
## PC133        5.425e-02  9.304e-02    0.583 0.559879    
## PC134        1.257e-01  9.459e-02    1.328 0.184083    
## PC135        1.085e-01  9.369e-02    1.158 0.246992    
## PC136        1.295e-01  9.354e-02    1.384 0.166366    
## PC137       -1.720e-01  9.392e-02   -1.831 0.067150 .  
## PC138        1.324e-01  9.367e-02    1.414 0.157482    
## PC139       -1.842e-01  9.436e-02   -1.952 0.051013 .  
## PC140       -8.253e-02  9.404e-02   -0.878 0.380174    
## PC141        1.226e-01  9.412e-02    1.302 0.192885    
## PC142        4.906e-02  9.475e-02    0.518 0.604650    
## PC143        1.120e-01  9.432e-02    1.187 0.235191    
## PC144        2.729e-01  9.545e-02    2.859 0.004266 ** 
## PC145        1.477e-01  9.537e-02    1.549 0.121470    
## PC146        3.755e-01  9.501e-02    3.952 7.86e-05 ***
## PC147        4.275e-03  9.518e-02    0.045 0.964179    
## PC148       -4.341e-02  9.531e-02   -0.455 0.648828    
## PC149        3.865e-02  9.485e-02    0.407 0.683680    
## PC150        1.232e-01  9.567e-02    1.287 0.198046    
## PC151        1.201e-01  9.545e-02    1.258 0.208429    
## PC152       -2.326e-02  9.544e-02   -0.244 0.807495    
## PC153        8.612e-02  9.566e-02    0.900 0.368008    
## PC154       -8.589e-02  9.650e-02   -0.890 0.373527    
## PC155        1.911e-01  9.561e-02    1.999 0.045664 *  
## PC156        1.394e-01  9.626e-02    1.448 0.147738    
## PC157        9.014e-02  9.641e-02    0.935 0.349833    
## PC158       -5.956e-02  9.611e-02   -0.620 0.535430    
## PC159        3.276e-01  9.628e-02    3.403 0.000672 ***
## PC160        3.572e-02  9.681e-02    0.369 0.712132    
## PC161        2.109e-02  9.597e-02    0.220 0.826088    
## PC162       -4.138e-01  9.666e-02   -4.281 1.89e-05 ***
## PC163        3.029e-01  9.729e-02    3.113 0.001861 ** 
## PC164        6.891e-02  9.704e-02    0.710 0.477643    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.458 on 5153 degrees of freedom
## Multiple R-squared:  0.3409, Adjusted R-squared:  0.3199 
## F-statistic: 16.25 on 164 and 5153 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 254"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##          PC1          PC6         PC11         PC23         PC24         PC25         PC26         PC33         PC41 
## 0.0001072349 0.0058606124 0.0011053789 0.0011736711 0.0111317657 0.0009296367 0.0002676658 0.0409342966 0.0402243783 
##         PC43         PC44         PC46         PC76         PC97        PC131        PC138        PC159 
## 0.0238913140 0.0009881893 0.0130897563 0.0233459332 0.0481640017 0.0051489325 0.0408374166 0.0385634058
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.579  -7.113  -1.354   5.639  61.481 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 125.4914     0.1438   872.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.75 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 25 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1       1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2       2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3       3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4       4 10.023439 0.13141280 7.641212 0.5019716 0.03605893 0.3239474
## 5       5  9.925468 0.14801576 7.552602 0.4996998 0.03658746 0.3075944
## 6       6  9.867988 0.15781946 7.492552 0.5218940 0.03947911 0.3374633
## 7       7  9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8       8  9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9       9  9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10     10  9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11     11  9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12     12  9.736668 0.17982775 7.409408 0.4927943 0.03807716 0.3166351
## 13     13  9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14     14  9.710142 0.18453084 7.386957 0.4937527 0.03943505 0.3209735
## 15     15  9.705682 0.18524759 7.383546 0.5000330 0.03877914 0.3263911
## 16     16  9.696500 0.18676637 7.377538 0.4970043 0.03730214 0.3148459
## 17     17  9.674323 0.19036045 7.355053 0.4863771 0.03640112 0.3026000
## 18     18  9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19     19  9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20     20  9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21     21  9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22     22  9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23     23  9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24     24  9.620150 0.19934080 7.312674 0.4714197 0.03361914 0.2939178
## 25     25  9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26     26  9.623708 0.19893673 7.317046 0.4714566 0.03434947 0.2956297
## 27     27  9.627039 0.19847483 7.320462 0.4801393 0.03458080 0.3061704
## 28     28  9.627234 0.19837556 7.318306 0.4690775 0.03282265 0.2991245
## 29     29  9.624637 0.19885758 7.316869 0.4657579 0.03222623 0.2977682
## 30     30  9.622027 0.19927591 7.315291 0.4712930 0.03298640 0.3037713
## 31     31  9.624224 0.19897155 7.324123 0.4717754 0.03316739 0.3064307
## 32     32  9.625179 0.19886894 7.325359 0.4758757 0.03426587 0.3167161
## 33     33  9.627267 0.19861554 7.324894 0.4830946 0.03504843 0.3216019
## 34     34  9.620108 0.19977267 7.323983 0.4837071 0.03464828 0.3242701
## 35     35  9.622769 0.19926429 7.322787 0.4781829 0.03300448 0.3240712
## 36     36  9.623398 0.19910784 7.318635 0.4793814 0.03311017 0.3251113
## 37     37  9.634104 0.19742675 7.324580 0.4823170 0.03288461 0.3265167
## 38     38  9.636863 0.19707855 7.326810 0.4842750 0.03258278 0.3218998
## 39     39  9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40     40  9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41     41  9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42     42  9.635209 0.19752633 7.331015 0.4786513 0.03230496 0.3176523
## 43     43  9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44     44  9.646134 0.19603882 7.342916 0.4790607 0.03211242 0.3144387
## 45     45  9.645160 0.19621505 7.344346 0.4810358 0.03235679 0.3151963
## 46     46  9.649825 0.19555997 7.344793 0.4827900 0.03250864 0.3180991
## 47     47  9.645080 0.19627428 7.344954 0.4824734 0.03243365 0.3170790
## 48     48  9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49     49  9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50     50  9.649302 0.19569868 7.350758 0.4810711 0.03144358 0.3152121
## 51     51  9.649499 0.19573827 7.349735 0.4888994 0.03202542 0.3186445
## 52     52  9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53     53  9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54     54  9.645082 0.19648076 7.345811 0.4808532 0.03131329 0.3170108
## 55     55  9.640925 0.19721429 7.343046 0.4849292 0.03188032 0.3212494
## 56     56  9.637013 0.19790079 7.338159 0.4904495 0.03268701 0.3247092
## 57     57  9.638109 0.19777097 7.340913 0.4898962 0.03226083 0.3221252
## 58     58  9.638407 0.19778974 7.339997 0.4858916 0.03179945 0.3205142
## 59     59  9.639943 0.19758818 7.337055 0.4827745 0.03170063 0.3188693
## 60     60  9.640663 0.19750436 7.336194 0.4815449 0.03186693 0.3185537
## 61     61  9.640124 0.19765083 7.337367 0.4833328 0.03179793 0.3186146
## 62     62  9.633940 0.19861069 7.331281 0.4818583 0.03151828 0.3182952
## 63     63  9.628424 0.19948669 7.327147 0.4847013 0.03192254 0.3182572
## 64     64  9.624632 0.20009907 7.326365 0.4819237 0.03189702 0.3192949
## 65     65  9.622465 0.20042856 7.325254 0.4798265 0.03170182 0.3157264
## 66     66  9.622955 0.20034898 7.325146 0.4816697 0.03204894 0.3181711
## 67     67  9.628533 0.19951967 7.328708 0.4777532 0.03161616 0.3146589
## 68     68  9.627700 0.19968550 7.327059 0.4790401 0.03161636 0.3143854
## 69     69  9.628646 0.19956177 7.328556 0.4758252 0.03096929 0.3131130
## 70     70  9.628704 0.19957924 7.331279 0.4744442 0.03061362 0.3089066
## 71     71  9.633963 0.19878019 7.334681 0.4743814 0.03081528 0.3079131
## 72     72  9.632865 0.19893976 7.334863 0.4731412 0.03041359 0.3077718
## 73     73  9.634444 0.19863871 7.335496 0.4738841 0.03074422 0.3128240
## 74     74  9.633199 0.19886446 7.332381 0.4697864 0.03016688 0.3109643
## 75     75  9.634434 0.19870236 7.332797 0.4713104 0.03034128 0.3128872
## 76     76  9.637916 0.19828939 7.334940 0.4708538 0.03065172 0.3126119
## 77     77  9.637252 0.19842780 7.334817 0.4729461 0.03100066 0.3128065
## 78     78  9.637472 0.19842780 7.336893 0.4744315 0.03110637 0.3152575
## 79     79  9.636680 0.19857333 7.336001 0.4804500 0.03160138 0.3196203
## 80     80  9.636986 0.19855864 7.334999 0.4818910 0.03186822 0.3213856
## 81     81  9.635285 0.19884976 7.333501 0.4824718 0.03204367 0.3210905
## 82     82  9.640748 0.19811068 7.337920 0.4810537 0.03205001 0.3203650
## 83     83  9.642481 0.19787745 7.338285 0.4834944 0.03230321 0.3211610
## 84     84  9.646272 0.19732857 7.341442 0.4872627 0.03254754 0.3251286
## 85     85  9.644749 0.19756028 7.339795 0.4868332 0.03254305 0.3271164
## 86     86  9.643731 0.19773806 7.339376 0.4869090 0.03252164 0.3281615
## 87     87  9.645909 0.19748384 7.342140 0.4880268 0.03265441 0.3268914
## 88     88  9.644757 0.19768174 7.341526 0.4880668 0.03294075 0.3269501
## 89     89  9.645023 0.19768281 7.341219 0.4899201 0.03297994 0.3266695
## 90     90  9.644678 0.19774921 7.339831 0.4899577 0.03319705 0.3252407
## 91     91  9.644409 0.19776139 7.337215 0.4893980 0.03319314 0.3242811
## 92     92  9.642856 0.19800183 7.334264 0.4885725 0.03310819 0.3250349
## 93     93  9.642154 0.19812880 7.332556 0.4915675 0.03349869 0.3271316
## 94     94  9.639872 0.19846016 7.331758 0.4902764 0.03325887 0.3255088
## 95     95  9.638005 0.19871153 7.332392 0.4882124 0.03298859 0.3226219
## 96     96  9.641738 0.19818132 7.335622 0.4891497 0.03311914 0.3255611
## 97     97  9.640631 0.19831772 7.335446 0.4872026 0.03269403 0.3251129
## 98     98  9.637355 0.19877654 7.334024 0.4868692 0.03264230 0.3256004
## 99     99  9.637689 0.19870467 7.334804 0.4846126 0.03246350 0.3247642
## 100   100  9.638953 0.19857125 7.335761 0.4870460 0.03286540 0.3285168
## 101   101  9.637922 0.19875072 7.335148 0.4869846 0.03279377 0.3262044
## 102   102  9.639737 0.19847845 7.334673 0.4869977 0.03270639 0.3269458
## 103   103  9.637043 0.19888613 7.333817 0.4860516 0.03252267 0.3258162
## 104   104  9.635660 0.19911137 7.331260 0.4844192 0.03227342 0.3244761
## 105   105  9.633810 0.19939515 7.327971 0.4857133 0.03246504 0.3241686
## 106   106  9.633238 0.19952111 7.328810 0.4861357 0.03266381 0.3246772
## 107   107  9.631306 0.19982553 7.325965 0.4871883 0.03302511 0.3251029
## 108   108  9.631423 0.19984389 7.325989 0.4907414 0.03353751 0.3268670
## 109   109  9.632987 0.19961834 7.326950 0.4920127 0.03363840 0.3277024
## 110   110  9.633933 0.19951000 7.329165 0.4929164 0.03372692 0.3285818
## 111   111  9.635282 0.19932179 7.330047 0.4923225 0.03373661 0.3272854
## 112   112  9.635311 0.19932662 7.330227 0.4921802 0.03375941 0.3269019
## 113   113  9.637317 0.19904490 7.332003 0.4932715 0.03390863 0.3271163
## 114   114  9.638101 0.19892513 7.332506 0.4935269 0.03387177 0.3264111
## 115   115  9.637307 0.19901663 7.332073 0.4936147 0.03385586 0.3265408
## 116   116  9.635376 0.19927917 7.330591 0.4918289 0.03358816 0.3255643
## 117   117  9.634994 0.19933702 7.331559 0.4910203 0.03348498 0.3254264
## 118   118  9.633779 0.19953113 7.331312 0.4946111 0.03394629 0.3266887
## 119   119  9.634948 0.19935903 7.332872 0.4942967 0.03411209 0.3263033
## 120   120  9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121   121  9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122   122  9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123   123  9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124   124  9.634449 0.19943503 7.331651 0.4931734 0.03398508 0.3245911
## 125   125  9.634523 0.19942949 7.331194 0.4919827 0.03404017 0.3240884
## 126   126  9.635073 0.19936421 7.330728 0.4922135 0.03405362 0.3234224
## 127   127  9.633802 0.19955709 7.329625 0.4936136 0.03422410 0.3241812
## 128   128  9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129   129  9.635138 0.19935852 7.330553 0.4939452 0.03415657 0.3254823
## 130   130  9.635835 0.19926628 7.331299 0.4944289 0.03422693 0.3263709
## 131   131  9.635837 0.19929391 7.332557 0.4956810 0.03448172 0.3281210
## 132   132  9.634573 0.19949616 7.331093 0.4966212 0.03468216 0.3283526
## 133   133  9.634159 0.19955455 7.330413 0.4957496 0.03465020 0.3280146
## 134   134  9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135   135  9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136   136  9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137   137  9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138   138  9.631808 0.19991432 7.331033 0.4934708 0.03437552 0.3256485
## 139   139  9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140   140  9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141   141  9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142   142  9.630717 0.20008648 7.330770 0.4922773 0.03410335 0.3243341
## 143   143  9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144   144  9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145   145  9.630944 0.20003653 7.330957 0.4930576 0.03407356 0.3250432
## 146   146  9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147   147  9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148   148  9.631653 0.19994064 7.331453 0.4931675 0.03406227 0.3242381
## 149   149  9.632159 0.19988227 7.331403 0.4929932 0.03407681 0.3241466
## 150   150  9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151   151  9.632196 0.19987376 7.331605 0.4930766 0.03398577 0.3242435
## 152   152  9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153   153  9.632308 0.19986587 7.331162 0.4935183 0.03404603 0.3245987
## 154   154  9.632400 0.19985752 7.331563 0.4933914 0.03403469 0.3246104
## 155   155  9.632458 0.19984612 7.331529 0.4936124 0.03404341 0.3247388
## 156   156  9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157   157  9.632222 0.19987341 7.331613 0.4936005 0.03404458 0.3244574
## 158   158  9.631946 0.19991440 7.331456 0.4936122 0.03404299 0.3245422
## 159   159  9.632034 0.19990167 7.331550 0.4934646 0.03404531 0.3245381
## 160   160  9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161   161  9.631809 0.19993444 7.331368 0.4937166 0.03408048 0.3245996
## 162   162  9.631782 0.19993549 7.331476 0.4936692 0.03406819 0.3244681
## 163   163  9.631765 0.19993663 7.331428 0.4936124 0.03405973 0.3244335
## 164   164  9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
##    nvmax
## 25    25

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                 Estimate        2.5 %       97.5 %
## (Intercept) 125.53971849 125.28883081 125.79060618
## PC1          -0.13155296  -0.15340867  -0.10969725
## PC2          -0.26256899  -0.28454276  -0.24059521
## PC3          -0.12415708  -0.14631202  -0.10200214
## PC4          -0.09550221  -0.11805332  -0.07295111
## PC5           0.05224619   0.02900617   0.07548621
## PC7          -0.05076737  -0.07462249  -0.02691225
## PC11         -0.14811447  -0.17507997  -0.12114898
## PC12         -0.14975367  -0.17850496  -0.12100239
## PC13          0.08922680   0.06019911   0.11825449
## PC14          0.07847991   0.04839389   0.10856593
## PC16          0.10772985   0.07664904   0.13881065
## PC17         -0.06536087  -0.09794315  -0.03277859
## PC18         -0.11027784  -0.14449562  -0.07606006
## PC20          0.12375601   0.08610353   0.16140849
## PC24         -0.20796590  -0.29669653  -0.11923527
## PC32         -0.22627313  -0.35174041  -0.10080586
## PC34          0.34413952   0.20877870   0.47950034
## PC71          0.26455787   0.06967743   0.45943832
## PC85          0.34616931   0.13853396   0.55380465
## PC87          0.45096243   0.24053728   0.66138758
## PC115        -0.41723819  -0.63976712  -0.19470927
## PC131        -0.31769431  -0.54455701  -0.09083161
## PC144         0.37621373   0.14391357   0.60851390
## PC159         0.44676508   0.21161430   0.68191585
## PC162        -0.39020390  -0.62632711  -0.15408070

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   107.5   123.0   126.5   125.7   129.3   135.6 
## [1] "leapForward  Test MSE: 93.778539216204"
## [1] "leapForward  Test RMSE: 9.68393201216345"
## [1] "leapForward  Test MSE (Org Scale): 93.778539216204"
## [1] "leapForward  Test RMSE (Org Scale): 9.68393201216345"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 25 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1       1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2       2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3       3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4       4 10.023439 0.13141280 7.641212 0.5019716 0.03605893 0.3239474
## 5       5  9.925468 0.14801576 7.552602 0.4996998 0.03658746 0.3075944
## 6       6  9.867988 0.15781946 7.492552 0.5218940 0.03947911 0.3374633
## 7       7  9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8       8  9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9       9  9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10     10  9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11     11  9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12     12  9.736668 0.17982775 7.409408 0.4927943 0.03807716 0.3166351
## 13     13  9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14     14  9.710142 0.18453084 7.386957 0.4937527 0.03943505 0.3209735
## 15     15  9.705682 0.18524759 7.383546 0.5000330 0.03877914 0.3263911
## 16     16  9.696500 0.18676637 7.377538 0.4970043 0.03730214 0.3148459
## 17     17  9.674323 0.19036045 7.355053 0.4863771 0.03640112 0.3026000
## 18     18  9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19     19  9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20     20  9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21     21  9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22     22  9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23     23  9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24     24  9.620150 0.19934080 7.312674 0.4714197 0.03361914 0.2939178
## 25     25  9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26     26  9.623708 0.19893673 7.317046 0.4714566 0.03434947 0.2956297
## 27     27  9.627039 0.19847483 7.320462 0.4801393 0.03458080 0.3061704
## 28     28  9.627609 0.19831751 7.318180 0.4697247 0.03289685 0.2989354
## 29     29  9.623764 0.19899899 7.316502 0.4725575 0.03326723 0.2985276
## 30     30  9.619851 0.19962591 7.315416 0.4756054 0.03372327 0.3034597
## 31     31  9.624224 0.19897155 7.324123 0.4717754 0.03316739 0.3064307
## 32     32  9.625179 0.19886894 7.325359 0.4758757 0.03426587 0.3167161
## 33     33  9.627337 0.19861048 7.325259 0.4830518 0.03504374 0.3213584
## 34     34  9.626849 0.19863301 7.326118 0.4793046 0.03402334 0.3228997
## 35     35  9.625014 0.19891473 7.324586 0.4764846 0.03303804 0.3234589
## 36     36  9.627385 0.19852215 7.321155 0.4862141 0.03389284 0.3290649
## 37     37  9.634632 0.19734518 7.324639 0.4832365 0.03299993 0.3266106
## 38     38  9.636863 0.19707855 7.326810 0.4842750 0.03258278 0.3218998
## 39     39  9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40     40  9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41     41  9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42     42  9.635209 0.19752633 7.331015 0.4786513 0.03230496 0.3176523
## 43     43  9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44     44  9.643705 0.19642824 7.342231 0.4763202 0.03192094 0.3139294
## 45     45  9.639852 0.19705398 7.343501 0.4769665 0.03181261 0.3152953
## 46     46  9.642390 0.19671863 7.342911 0.4763663 0.03180864 0.3174432
## 47     47  9.642987 0.19659628 7.346236 0.4801213 0.03227406 0.3180657
## 48     48  9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49     49  9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50     50  9.649638 0.19562552 7.352013 0.4807571 0.03139963 0.3145837
## 51     51  9.648766 0.19583917 7.349302 0.4895776 0.03208614 0.3188607
## 52     52  9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53     53  9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54     54  9.645879 0.19638340 7.346858 0.4849839 0.03155713 0.3198481
## 55     55  9.641255 0.19713095 7.343260 0.4872314 0.03187938 0.3223013
## 56     56  9.634455 0.19823908 7.334609 0.4852839 0.03180882 0.3214321
## 57     57  9.638225 0.19774653 7.338815 0.4892439 0.03236620 0.3236202
## 58     58  9.639753 0.19755560 7.338282 0.4840383 0.03181736 0.3217224
## 59     59  9.641443 0.19733518 7.337813 0.4813345 0.03160042 0.3184130
## 60     60  9.637847 0.19794122 7.332749 0.4823624 0.03218830 0.3196055
## 61     61  9.637085 0.19813070 7.333834 0.4843272 0.03216323 0.3196899
## 62     62  9.630105 0.19922757 7.328405 0.4841952 0.03209729 0.3200003
## 63     63  9.626028 0.19986614 7.325545 0.4861569 0.03228542 0.3192122
## 64     64  9.624632 0.20009907 7.326365 0.4819237 0.03189702 0.3192949
## 65     65  9.623066 0.20034085 7.326416 0.4794706 0.03162154 0.3150573
## 66     66  9.623583 0.20025638 7.326371 0.4812940 0.03196354 0.3174405
## 67     67  9.628533 0.19951967 7.328708 0.4777532 0.03161616 0.3146589
## 68     68  9.626558 0.19982993 7.325933 0.4768326 0.03135079 0.3122173
## 69     69  9.627609 0.19967295 7.327638 0.4735192 0.03068204 0.3107170
## 70     70  9.628046 0.19963568 7.330722 0.4713702 0.03026589 0.3081748
## 71     71  9.633440 0.19884876 7.334766 0.4744917 0.03083888 0.3079760
## 72     72  9.630207 0.19935318 7.333459 0.4730032 0.03053585 0.3072335
## 73     73  9.634320 0.19866301 7.334429 0.4738796 0.03075226 0.3124372
## 74     74  9.632606 0.19896334 7.332290 0.4697531 0.03019562 0.3109295
## 75     75  9.633333 0.19887884 7.330988 0.4707002 0.03040032 0.3122666
## 76     76  9.637270 0.19839607 7.334204 0.4705131 0.03069367 0.3123891
## 77     77  9.637330 0.19842028 7.335480 0.4730939 0.03101370 0.3140298
## 78     78  9.637987 0.19834693 7.337129 0.4754113 0.03124856 0.3156944
## 79     79  9.634839 0.19888245 7.335009 0.4794442 0.03168884 0.3197409
## 80     80  9.634369 0.19898613 7.332618 0.4806072 0.03205410 0.3207875
## 81     81  9.635268 0.19887677 7.333434 0.4822420 0.03223080 0.3210465
## 82     82  9.640854 0.19809241 7.336798 0.4808563 0.03197181 0.3209463
## 83     83  9.642530 0.19785495 7.338418 0.4826313 0.03217259 0.3214175
## 84     84  9.645931 0.19737047 7.341670 0.4866171 0.03247379 0.3255516
## 85     85  9.643375 0.19777569 7.338913 0.4876278 0.03273128 0.3282442
## 86     86  9.642478 0.19794873 7.337956 0.4883664 0.03279266 0.3297883
## 87     87  9.644971 0.19762677 7.342382 0.4886839 0.03272002 0.3280370
## 88     88  9.643955 0.19779005 7.340143 0.4888782 0.03300201 0.3268896
## 89     89  9.646381 0.19748613 7.342141 0.4911933 0.03337987 0.3272014
## 90     90  9.644974 0.19770212 7.339921 0.4900193 0.03324822 0.3252611
## 91     91  9.645644 0.19759609 7.338008 0.4896470 0.03336991 0.3244592
## 92     92  9.643941 0.19784867 7.335431 0.4887986 0.03327433 0.3253143
## 93     93  9.642457 0.19808291 7.333022 0.4909757 0.03339273 0.3271385
## 94     94  9.638946 0.19861517 7.331746 0.4894207 0.03318396 0.3253670
## 95     95  9.637986 0.19871240 7.332079 0.4882037 0.03298901 0.3225894
## 96     96  9.641738 0.19818132 7.335622 0.4891497 0.03311914 0.3255611
## 97     97  9.640693 0.19830926 7.335751 0.4871465 0.03269158 0.3249365
## 98     98  9.638406 0.19861060 7.335546 0.4859267 0.03260109 0.3247582
## 99     99  9.638292 0.19860432 7.335371 0.4835742 0.03239127 0.3241332
## 100   100  9.637979 0.19871634 7.335049 0.4867231 0.03277228 0.3283375
## 101   101  9.637922 0.19875072 7.335148 0.4869846 0.03279377 0.3262044
## 102   102  9.639737 0.19847845 7.334673 0.4869977 0.03270639 0.3269458
## 103   103  9.636989 0.19891360 7.333854 0.4888500 0.03280641 0.3269784
## 104   104  9.635866 0.19910494 7.331034 0.4877309 0.03262011 0.3250461
## 105   105  9.634853 0.19925137 7.329637 0.4881192 0.03278720 0.3267913
## 106   106  9.633075 0.19954496 7.328999 0.4862240 0.03268191 0.3245915
## 107   107  9.631798 0.19974523 7.327250 0.4869244 0.03296552 0.3245389
## 108   108  9.632383 0.19971464 7.327464 0.4914903 0.03359949 0.3269074
## 109   109  9.633354 0.19957533 7.328699 0.4929301 0.03368659 0.3274432
## 110   110  9.634164 0.19946059 7.330328 0.4929124 0.03365076 0.3280303
## 111   111  9.635282 0.19932179 7.330047 0.4923225 0.03373661 0.3272854
## 112   112  9.635311 0.19932662 7.330227 0.4921802 0.03375941 0.3269019
## 113   113  9.636710 0.19913524 7.331110 0.4930719 0.03385221 0.3268784
## 114   114  9.637493 0.19901546 7.331498 0.4933230 0.03381400 0.3261352
## 115   115  9.637400 0.19899686 7.331122 0.4938071 0.03378569 0.3256036
## 116   116  9.636425 0.19913059 7.331039 0.4933183 0.03373932 0.3262363
## 117   117  9.636952 0.19906629 7.332437 0.4932250 0.03373071 0.3259437
## 118   118  9.634861 0.19936600 7.332259 0.4949759 0.03405248 0.3269493
## 119   119  9.634948 0.19935903 7.332872 0.4942967 0.03411209 0.3263033
## 120   120  9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121   121  9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122   122  9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123   123  9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124   124  9.635311 0.19933604 7.331776 0.4924463 0.03393819 0.3245462
## 125   125  9.634750 0.19940906 7.330999 0.4917910 0.03403052 0.3241592
## 126   126  9.635770 0.19927646 7.330790 0.4916262 0.03401231 0.3234002
## 127   127  9.634268 0.19949013 7.329844 0.4932195 0.03419209 0.3241022
## 128   128  9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129   129  9.635138 0.19935852 7.330553 0.4939452 0.03415657 0.3254823
## 130   130  9.635817 0.19926592 7.331557 0.4943953 0.03422760 0.3268632
## 131   131  9.635713 0.19931451 7.332509 0.4954391 0.03444387 0.3280296
## 132   132  9.634573 0.19949616 7.331093 0.4966212 0.03468216 0.3283526
## 133   133  9.634159 0.19955455 7.330413 0.4957496 0.03465020 0.3280146
## 134   134  9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135   135  9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136   136  9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137   137  9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138   138  9.631808 0.19991432 7.331033 0.4934708 0.03437552 0.3256485
## 139   139  9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140   140  9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141   141  9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142   142  9.630717 0.20008648 7.330770 0.4922773 0.03410335 0.3243341
## 143   143  9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144   144  9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145   145  9.630944 0.20003653 7.330957 0.4930576 0.03407356 0.3250432
## 146   146  9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147   147  9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148   148  9.631653 0.19994064 7.331453 0.4931675 0.03406227 0.3242381
## 149   149  9.632159 0.19988227 7.331403 0.4929932 0.03407681 0.3241466
## 150   150  9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151   151  9.632196 0.19987376 7.331605 0.4930766 0.03398577 0.3242435
## 152   152  9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153   153  9.632308 0.19986587 7.331162 0.4935183 0.03404603 0.3245987
## 154   154  9.632400 0.19985752 7.331563 0.4933914 0.03403469 0.3246104
## 155   155  9.632458 0.19984612 7.331529 0.4936124 0.03404341 0.3247388
## 156   156  9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157   157  9.632222 0.19987341 7.331613 0.4936005 0.03404458 0.3244574
## 158   158  9.631946 0.19991440 7.331456 0.4936122 0.03404299 0.3245422
## 159   159  9.632034 0.19990167 7.331550 0.4934646 0.03404531 0.3245381
## 160   160  9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161   161  9.631809 0.19993444 7.331368 0.4937166 0.03408048 0.3245996
## 162   162  9.631782 0.19993549 7.331476 0.4936692 0.03406819 0.3244681
## 163   163  9.631765 0.19993663 7.331428 0.4936124 0.03405973 0.3244335
## 164   164  9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
##    nvmax
## 25    25

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                 Estimate        2.5 %        97.5 %
## (Intercept) 125.53752612 125.28664016 125.788412094
## PC1          -0.13174378  -0.15360057  -0.109886988
## PC2          -0.26265119  -0.28462520  -0.240677171
## PC3          -0.12392626  -0.14608133  -0.101771190
## PC4          -0.09605243  -0.11860394  -0.073500921
## PC5           0.05232279   0.02908298   0.075562607
## PC6          -0.03151566  -0.05479069  -0.008240638
## PC7          -0.05104896  -0.07490210  -0.027195809
## PC11         -0.14809027  -0.17505597  -0.121124574
## PC12         -0.14995002  -0.17870177  -0.121198282
## PC13          0.08936427   0.06033631   0.118392224
## PC14          0.07883159   0.04874545   0.108917730
## PC16          0.10772954   0.07664863   0.138810453
## PC17         -0.06539942  -0.09798185  -0.032816992
## PC18         -0.11065770  -0.14487675  -0.076438656
## PC20          0.12389080   0.08623812   0.161543488
## PC24         -0.20717929  -0.29591195  -0.118446630
## PC32         -0.22568123  -0.35114761  -0.100214855
## PC34          0.34242571   0.20706931   0.477782106
## PC85          0.34636392   0.13872842   0.553999423
## PC87          0.45277268   0.24235715   0.663188204
## PC115        -0.41521988  -0.63775861  -0.192681152
## PC131        -0.31671509  -0.54358089  -0.089849296
## PC144         0.37184913   0.13955224   0.604146014
## PC159         0.44155784   0.20640088   0.676714787
## PC162        -0.38798731  -0.62411302  -0.151861607

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   107.2   122.9   126.5   125.7   129.3   135.8 
## [1] "leapBackward  Test MSE: 93.7076027896315"
## [1] "leapBackward  Test RMSE: 9.68026873540355"
## [1] "leapBackward  Test MSE (Org Scale): 93.7076027896315"
## [1] "leapBackward  Test RMSE (Org Scale): 9.68026873540355"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 35 on full training set
## [1] "All models results"
##     nvmax      RMSE   Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1       1 10.314398 0.08054327 7.884919 0.4271001 0.02709031 0.2439494
## 2       2 10.199048 0.10109480 7.798425 0.4900334 0.03484458 0.3168454
## 3       3 10.125736 0.11377356 7.728769 0.4980288 0.03380131 0.3271481
## 4       4 10.035485 0.12975376 7.639849 0.5046685 0.03739921 0.3292890
## 5       5  9.932844 0.14670423 7.558306 0.5039755 0.03809777 0.3114495
## 6       6 10.030405 0.13051774 7.635995 0.5076900 0.03591819 0.3354414
## 7       7  9.854531 0.15994439 7.481765 0.5170148 0.03960708 0.3305379
## 8       8  9.824284 0.16494390 7.471437 0.5130926 0.03802914 0.3243163
## 9       9  9.797375 0.16960724 7.451693 0.5057431 0.03806398 0.3244449
## 10     10  9.747838 0.17762966 7.416007 0.5002890 0.03803179 0.3205494
## 11     11  9.740183 0.17898362 7.414736 0.4974425 0.03697085 0.3144536
## 12     12  9.767298 0.17484789 7.434547 0.5281442 0.04312871 0.3371246
## 13     13  9.723374 0.18241267 7.396719 0.4935489 0.04058802 0.3203828
## 14     14  9.723740 0.18230840 7.395550 0.4649143 0.03431618 0.2982257
## 15     15  9.731958 0.18077727 7.407222 0.5078310 0.04139178 0.3310715
## 16     16  9.709285 0.18458215 7.376846 0.5096415 0.03979359 0.3281043
## 17     17  9.676890 0.19000806 7.351709 0.4866735 0.03635214 0.3012762
## 18     18  9.653421 0.19382813 7.336945 0.4819013 0.03653255 0.3013920
## 19     19  9.648811 0.19460511 7.337869 0.4809606 0.03641921 0.2974860
## 20     20  9.640805 0.19584349 7.334312 0.4725456 0.03542158 0.2918549
## 21     21  9.631106 0.19750881 7.326202 0.4619025 0.03464146 0.2877406
## 22     22  9.629274 0.19782504 7.319748 0.4606926 0.03346942 0.2898984
## 23     23  9.619234 0.19946334 7.315399 0.4740728 0.03426493 0.2923705
## 24     24  9.628839 0.19794973 7.316840 0.4829603 0.03485251 0.2975578
## 25     25  9.618917 0.19966723 7.312588 0.4716647 0.03449186 0.2962706
## 26     26  9.632674 0.19743285 7.327755 0.4799379 0.03477068 0.3097468
## 27     27  9.632822 0.19751978 7.328972 0.4687156 0.03259117 0.2848668
## 28     28  9.627234 0.19837556 7.318306 0.4690775 0.03282265 0.2991245
## 29     29  9.633097 0.19742420 7.326575 0.4703787 0.03321850 0.3017409
## 30     30  9.627875 0.19814296 7.331764 0.4685781 0.03308127 0.3034400
## 31     31  9.639787 0.19631180 7.332932 0.4549729 0.03218927 0.2876044
## 32     32  9.629992 0.19810176 7.325367 0.4762382 0.03409035 0.3167192
## 33     33  9.624376 0.19933192 7.329465 0.4849616 0.03577901 0.3188442
## 34     34  9.624356 0.19903069 7.326110 0.4812036 0.03399530 0.3229024
## 35     35  9.613278 0.20055832 7.322700 0.4770978 0.03339960 0.3192462
## 36     36  9.629847 0.19795599 7.320346 0.4747992 0.03336160 0.3245736
## 37     37  9.634632 0.19734518 7.324639 0.4832365 0.03299993 0.3266106
## 38     38  9.632323 0.19770234 7.322009 0.4765628 0.03174522 0.3144365
## 39     39  9.636866 0.19709061 7.326940 0.4816608 0.03217041 0.3176460
## 40     40  9.637624 0.19706216 7.331662 0.4801166 0.03246926 0.3177133
## 41     41  9.637170 0.19726070 7.332455 0.4827652 0.03291564 0.3239923
## 42     42  9.631121 0.19815246 7.326538 0.4820721 0.03236611 0.3196006
## 43     43  9.644048 0.19625822 7.342010 0.4770835 0.03214499 0.3165599
## 44     44  9.653638 0.19469807 7.345508 0.4730572 0.03215679 0.3132738
## 45     45  9.644565 0.19626995 7.344135 0.4816156 0.03239358 0.3153108
## 46     46  9.647166 0.19597772 7.342676 0.4817311 0.03217179 0.3172612
## 47     47  9.641219 0.19639501 7.344418 0.4793462 0.03190739 0.3177120
## 48     48  9.644091 0.19648993 7.349758 0.4798667 0.03192693 0.3172891
## 49     49  9.647452 0.19597088 7.352344 0.4821877 0.03178765 0.3145333
## 50     50  9.649302 0.19569868 7.350758 0.4810711 0.03144358 0.3152121
## 51     51  9.648766 0.19583917 7.349302 0.4895776 0.03208614 0.3188607
## 52     52  9.649036 0.19579624 7.348401 0.4842173 0.03145266 0.3148604
## 53     53  9.651952 0.19536610 7.350886 0.4797401 0.03088922 0.3149365
## 54     54  9.657675 0.19429043 7.351963 0.4699501 0.03105085 0.3140519
## 55     55  9.640991 0.19709526 7.343983 0.4848072 0.03164436 0.3190550
## 56     56  9.633323 0.19843090 7.335343 0.4863930 0.03189816 0.3210032
## 57     57  9.634674 0.19764783 7.333981 0.4812958 0.03234191 0.3089316
## 58     58  9.645476 0.19656866 7.333898 0.4979858 0.03345372 0.3193452
## 59     59  9.638123 0.19806877 7.341765 0.4839277 0.03220295 0.3160701
## 60     60  9.640663 0.19750436 7.336194 0.4815449 0.03186693 0.3185537
## 61     61  9.632959 0.19853639 7.328617 0.4711072 0.03053782 0.3064884
## 62     62  9.650786 0.19575846 7.343055 0.4676786 0.03133202 0.3124863
## 63     63  9.626028 0.19986614 7.325545 0.4861569 0.03228542 0.3192122
## 64     64  9.619048 0.20125257 7.327606 0.4855103 0.03312232 0.3186253
## 65     65  9.622465 0.20042856 7.325254 0.4798265 0.03170182 0.3157264
## 66     66  9.623583 0.20025638 7.326371 0.4812940 0.03196354 0.3174405
## 67     67  9.625210 0.20027783 7.332145 0.4799278 0.03243753 0.3126892
## 68     68  9.627700 0.19968550 7.327059 0.4790401 0.03161636 0.3143854
## 69     69  9.627498 0.19970770 7.327376 0.4736241 0.03070443 0.3108537
## 70     70  9.627408 0.19973652 7.330858 0.4719680 0.03032860 0.3081055
## 71     71  9.633821 0.19879325 7.334645 0.4745140 0.03082342 0.3079312
## 72     72  9.633054 0.19840440 7.335435 0.4729647 0.03012478 0.3075016
## 73     73  9.633220 0.19885323 7.334183 0.4732130 0.03081773 0.3124056
## 74     74  9.632606 0.19896334 7.332290 0.4697531 0.03019562 0.3109295
## 75     75  9.643045 0.19722997 7.331824 0.4726842 0.03031813 0.3125013
## 76     76  9.637270 0.19839607 7.334204 0.4705131 0.03069367 0.3123891
## 77     77  9.651117 0.19605608 7.343962 0.4623608 0.03142971 0.3092341
## 78     78  9.637920 0.19836162 7.336733 0.4753775 0.03125471 0.3155869
## 79     79  9.634738 0.19903145 7.340632 0.4794420 0.03182018 0.3167133
## 80     80  9.628402 0.19961485 7.327466 0.4660926 0.03012935 0.3079969
## 81     81  9.634214 0.19915641 7.338179 0.4822087 0.03226571 0.3179727
## 82     82  9.644343 0.19763083 7.345891 0.4806156 0.03166716 0.3172938
## 83     83  9.642862 0.19781385 7.338282 0.4832640 0.03224512 0.3211627
## 84     84  9.646473 0.19735186 7.348114 0.4862948 0.03245701 0.3226072
## 85     85  9.644749 0.19756028 7.339795 0.4868332 0.03254305 0.3271164
## 86     86  9.646077 0.19729338 7.335464 0.4930609 0.03343510 0.3274167
## 87     87  9.645155 0.19762625 7.347180 0.4880537 0.03272303 0.3257065
## 88     88  9.639757 0.19826580 7.334796 0.4916190 0.03304397 0.3232741
## 89     89  9.646705 0.19744152 7.342021 0.4913420 0.03335946 0.3271849
## 90     90  9.660434 0.19516634 7.350607 0.4778613 0.03334719 0.3208404
## 91     91  9.643725 0.19789868 7.340807 0.4907398 0.03361582 0.3231447
## 92     92  9.643155 0.19779708 7.338427 0.4902939 0.03316932 0.3182344
## 93     93  9.642154 0.19812880 7.332556 0.4915675 0.03349869 0.3271316
## 94     94  9.638975 0.19861292 7.331924 0.4894331 0.03318281 0.3253844
## 95     95  9.642294 0.19778840 7.327382 0.4928829 0.03353639 0.3200677
## 96     96  9.638688 0.19829174 7.338738 0.4811441 0.03118662 0.3165684
## 97     97  9.641035 0.19809478 7.331646 0.4872268 0.03264268 0.3241087
## 98     98  9.637950 0.19867922 7.334649 0.4861633 0.03264987 0.3251743
## 99     99  9.638709 0.19854400 7.335793 0.4837175 0.03243174 0.3242451
## 100   100  9.647845 0.19707847 7.345731 0.4790327 0.03358016 0.3243687
## 101   101  9.634841 0.19905561 7.336650 0.4860725 0.03261444 0.3266205
## 102   102  9.644171 0.19777240 7.336937 0.4892410 0.03245244 0.3272985
## 103   103  9.637043 0.19888613 7.333817 0.4860516 0.03252267 0.3258162
## 104   104  9.644544 0.19759412 7.342168 0.4880178 0.03442113 0.3290760
## 105   105  9.648470 0.19700612 7.339022 0.4780763 0.03309997 0.3234181
## 106   106  9.626111 0.20025674 7.318176 0.4701244 0.03084190 0.3068369
## 107   107  9.641499 0.19812141 7.333168 0.4795677 0.03324143 0.3225631
## 108   108  9.625872 0.20075685 7.323937 0.4940935 0.03435193 0.3278834
## 109   109  9.634808 0.19935575 7.329201 0.4917019 0.03365578 0.3272076
## 110   110  9.636990 0.19899400 7.338645 0.4987768 0.03619888 0.3338182
## 111   111  9.640986 0.19793926 7.339657 0.4890052 0.03413145 0.3239878
## 112   112  9.644118 0.19791443 7.341766 0.4947524 0.03551866 0.3317083
## 113   113  9.637317 0.19904490 7.332003 0.4932715 0.03390863 0.3271163
## 114   114  9.637493 0.19901546 7.331498 0.4933230 0.03381400 0.3261352
## 115   115  9.647355 0.19750109 7.342614 0.4965370 0.03577645 0.3310894
## 116   116  9.628669 0.20016285 7.326723 0.4905149 0.03317235 0.3251674
## 117   117  9.636408 0.19914600 7.331753 0.4930449 0.03368017 0.3257613
## 118   118  9.634861 0.19936600 7.332259 0.4949759 0.03405248 0.3269493
## 119   119  9.630940 0.19980799 7.331596 0.4931298 0.03385856 0.3259838
## 120   120  9.635280 0.19930563 7.332574 0.4928928 0.03393856 0.3254215
## 121   121  9.634920 0.19934770 7.332340 0.4926238 0.03384182 0.3254437
## 122   122  9.634375 0.19940941 7.330916 0.4912490 0.03366279 0.3238916
## 123   123  9.634742 0.19938107 7.332308 0.4924775 0.03384391 0.3246004
## 124   124  9.632594 0.19954532 7.329751 0.4926203 0.03392131 0.3241588
## 125   125  9.634750 0.19940906 7.330999 0.4917910 0.03403052 0.3241592
## 126   126  9.631275 0.19999527 7.331469 0.4942490 0.03460944 0.3230693
## 127   127  9.626061 0.20042196 7.329419 0.5007588 0.03475107 0.3242564
## 128   128  9.635959 0.19924004 7.330823 0.4937714 0.03421638 0.3246811
## 129   129  9.631418 0.19947836 7.333752 0.4935846 0.03315973 0.3177274
## 130   130  9.636154 0.19905206 7.331498 0.4945305 0.03435957 0.3264199
## 131   131  9.642816 0.19820577 7.335905 0.5042671 0.03537116 0.3313577
## 132   132  9.629037 0.19996286 7.326142 0.4731106 0.03131379 0.3081658
## 133   133  9.625251 0.20068170 7.322689 0.4788724 0.03271940 0.3139728
## 134   134  9.633833 0.19962523 7.330568 0.4961586 0.03467571 0.3279936
## 135   135  9.632697 0.19979297 7.330565 0.4944520 0.03447396 0.3265192
## 136   136  9.632797 0.19979215 7.330753 0.4940482 0.03443880 0.3264375
## 137   137  9.632161 0.19986596 7.330867 0.4933727 0.03435251 0.3253374
## 138   138  9.621678 0.20118880 7.327529 0.5028781 0.03517881 0.3270326
## 139   139  9.630757 0.20008335 7.330511 0.4936398 0.03442376 0.3255177
## 140   140  9.630653 0.20009826 7.330621 0.4930917 0.03429013 0.3255031
## 141   141  9.630782 0.20008063 7.330643 0.4920849 0.03415062 0.3245322
## 142   142  9.642788 0.19791308 7.339045 0.4868270 0.03432553 0.3228355
## 143   143  9.630938 0.20004469 7.330823 0.4925382 0.03410318 0.3248823
## 144   144  9.631273 0.19998616 7.331325 0.4931110 0.03408971 0.3251739
## 145   145  9.626272 0.20052867 7.326845 0.4804015 0.03230230 0.3110429
## 146   146  9.631400 0.19996883 7.331640 0.4925842 0.03399285 0.3240851
## 147   147  9.631958 0.19989725 7.331798 0.4928402 0.03403765 0.3239224
## 148   148  9.633109 0.19971418 7.334493 0.4937935 0.03394853 0.3244833
## 149   149  9.627775 0.20031032 7.327099 0.5091607 0.03566012 0.3294421
## 150   150  9.632447 0.19983882 7.331939 0.4928258 0.03402221 0.3241302
## 151   151  9.634510 0.19951189 7.334595 0.4940863 0.03380899 0.3244866
## 152   152  9.632032 0.19990266 7.331202 0.4933033 0.03403262 0.3242738
## 153   153  9.634018 0.19960311 7.333678 0.4942584 0.03391589 0.3247875
## 154   154  9.627332 0.20050814 7.328804 0.4979133 0.03440849 0.3257559
## 155   155  9.627752 0.20033229 7.326627 0.4845444 0.03316572 0.3156675
## 156   156  9.632461 0.19983831 7.331613 0.4933489 0.03399701 0.3242616
## 157   157  9.631847 0.19981085 7.336929 0.4953112 0.03454456 0.3258353
## 158   158  9.632576 0.19969635 7.332822 0.4948467 0.03445118 0.3271549
## 159   159  9.637735 0.19902471 7.336996 0.4890297 0.03405738 0.3226827
## 160   160  9.631878 0.19992469 7.331536 0.4934892 0.03404493 0.3244512
## 161   161  9.632377 0.19984984 7.331586 0.4938375 0.03417048 0.3246630
## 162   162  9.636927 0.19921567 7.335198 0.4917200 0.03433715 0.3243667
## 163   163  9.630493 0.20013581 7.329566 0.4939151 0.03416016 0.3244173
## 164   164  9.631757 0.19993708 7.331435 0.4936255 0.03405957 0.3244178
## [1] "Best Model"
##    nvmax
## 35    35

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                 Estimate        2.5 %        97.5 %
## (Intercept) 125.54332383 125.29362862 125.793019041
## PC1          -0.13107191  -0.15282709  -0.109316740
## PC2          -0.26143362  -0.28330665  -0.239560590
## PC3          -0.12353972  -0.14559136  -0.101488091
## PC4          -0.09593267  -0.11837892  -0.073486433
## PC5           0.05160143   0.02846952   0.074733329
## PC6          -0.03163802  -0.05480317  -0.008472871
## PC7          -0.04906467  -0.07281142  -0.025317927
## PC11         -0.14774898  -0.17458577  -0.120912196
## PC12         -0.14961413  -0.17822848  -0.120999770
## PC13          0.08898327   0.06009396   0.117872582
## PC14          0.08014441   0.05019583   0.110092985
## PC16          0.10817657   0.07724294   0.139110211
## PC17         -0.06639171  -0.09881922  -0.033964189
## PC18         -0.10980108  -0.14385685  -0.075745315
## PC20          0.12353678   0.08606610   0.161007456
## PC24         -0.20854604  -0.29687258  -0.120219492
## PC29          0.15284133   0.03938205   0.266300606
## PC32         -0.22568379  -0.35057007  -0.100797519
## PC34          0.34476425   0.21004354   0.479484960
## PC59          0.25147217   0.06760204   0.435342306
## PC64         -0.23383042  -0.42233991  -0.045320920
## PC68          0.25728527   0.06318426   0.451386288
## PC71          0.26255777   0.06858327   0.456532272
## PC83         -0.25700407  -0.46206759  -0.051940554
## PC85          0.35254319   0.14589681   0.559189573
## PC87          0.44709472   0.23767593   0.656513505
## PC106         0.30169008   0.08328181   0.520098344
## PC115        -0.41081054  -0.63229474  -0.189326348
## PC123        -0.25214503  -0.47416307  -0.030126988
## PC131        -0.30740473  -0.53322038  -0.081589075
## PC144         0.37345939   0.14227856   0.604640227
## PC146         0.28367384   0.05218695   0.515160736
## PC159         0.45444257   0.22038775   0.688497398
## PC162        -0.39209913  -0.62710258  -0.157095672
## PC163         0.32150076   0.08577379   0.557227726

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   106.0   122.9   126.4   125.8   129.3   136.5 
## [1] "leapSeq  Test MSE: 94.1366795585845"
## [1] "leapSeq  Test RMSE: 9.70240586445365"
## [1] "leapSeq  Test MSE (Org Scale): 94.1366795585845"
## [1] "leapSeq  Test RMSE (Org Scale): 9.70240586445365"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.01 on full training set
## glmnet 
## 
## 5584 samples
##  164 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared   MAE     
##   0.0001000000  9.628757  0.2001968  7.328930
##   0.0001047616  9.628757  0.2001968  7.328930
##   0.0001097499  9.628757  0.2001968  7.328930
##   0.0001149757  9.628757  0.2001968  7.328930
##   0.0001204504  9.628757  0.2001968  7.328930
##   0.0001261857  9.628757  0.2001968  7.328930
##   0.0001321941  9.628757  0.2001968  7.328930
##   0.0001384886  9.628757  0.2001968  7.328930
##   0.0001450829  9.628757  0.2001968  7.328930
##   0.0001519911  9.628757  0.2001968  7.328930
##   0.0001592283  9.628757  0.2001968  7.328930
##   0.0001668101  9.628757  0.2001968  7.328930
##   0.0001747528  9.628757  0.2001968  7.328930
##   0.0001830738  9.628757  0.2001968  7.328930
##   0.0001917910  9.628757  0.2001968  7.328930
##   0.0002009233  9.628757  0.2001968  7.328930
##   0.0002104904  9.628757  0.2001968  7.328930
##   0.0002205131  9.628757  0.2001968  7.328930
##   0.0002310130  9.628757  0.2001968  7.328930
##   0.0002420128  9.628757  0.2001968  7.328930
##   0.0002535364  9.628757  0.2001968  7.328930
##   0.0002656088  9.628757  0.2001968  7.328930
##   0.0002782559  9.628757  0.2001968  7.328930
##   0.0002915053  9.628757  0.2001968  7.328930
##   0.0003053856  9.628757  0.2001968  7.328930
##   0.0003199267  9.628757  0.2001968  7.328930
##   0.0003351603  9.628757  0.2001968  7.328930
##   0.0003511192  9.628757  0.2001968  7.328930
##   0.0003678380  9.628757  0.2001968  7.328930
##   0.0003853529  9.628757  0.2001968  7.328930
##   0.0004037017  9.628757  0.2001968  7.328930
##   0.0004229243  9.628757  0.2001968  7.328930
##   0.0004430621  9.628757  0.2001968  7.328930
##   0.0004641589  9.628757  0.2001968  7.328930
##   0.0004862602  9.628757  0.2001968  7.328930
##   0.0005094138  9.628757  0.2001968  7.328930
##   0.0005336699  9.628757  0.2001968  7.328930
##   0.0005590810  9.628757  0.2001968  7.328930
##   0.0005857021  9.628757  0.2001968  7.328930
##   0.0006135907  9.628757  0.2001968  7.328930
##   0.0006428073  9.628757  0.2001968  7.328930
##   0.0006734151  9.628757  0.2001968  7.328930
##   0.0007054802  9.628757  0.2001968  7.328930
##   0.0007390722  9.628757  0.2001968  7.328930
##   0.0007742637  9.628757  0.2001968  7.328930
##   0.0008111308  9.628757  0.2001968  7.328930
##   0.0008497534  9.628757  0.2001968  7.328930
##   0.0008902151  9.628757  0.2001968  7.328930
##   0.0009326033  9.628757  0.2001968  7.328930
##   0.0009770100  9.628757  0.2001968  7.328930
##   0.0010235310  9.628757  0.2001968  7.328930
##   0.0010722672  9.628757  0.2001968  7.328930
##   0.0011233240  9.628757  0.2001968  7.328930
##   0.0011768120  9.628757  0.2001968  7.328930
##   0.0012328467  9.628757  0.2001968  7.328930
##   0.0012915497  9.628757  0.2001968  7.328930
##   0.0013530478  9.628757  0.2001968  7.328930
##   0.0014174742  9.628757  0.2001968  7.328930
##   0.0014849683  9.628757  0.2001968  7.328930
##   0.0015556761  9.628757  0.2001968  7.328930
##   0.0016297508  9.628757  0.2001968  7.328930
##   0.0017073526  9.628757  0.2001968  7.328930
##   0.0017886495  9.628757  0.2001968  7.328930
##   0.0018738174  9.628757  0.2001968  7.328930
##   0.0019630407  9.628757  0.2001968  7.328930
##   0.0020565123  9.628757  0.2001968  7.328930
##   0.0021544347  9.628757  0.2001968  7.328930
##   0.0022570197  9.628757  0.2001968  7.328930
##   0.0023644894  9.628757  0.2001968  7.328930
##   0.0024770764  9.628757  0.2001968  7.328930
##   0.0025950242  9.628757  0.2001968  7.328930
##   0.0027185882  9.628757  0.2001968  7.328930
##   0.0028480359  9.628693  0.2002031  7.328875
##   0.0029836472  9.628553  0.2002156  7.328759
##   0.0031257158  9.628403  0.2002287  7.328635
##   0.0032745492  9.628247  0.2002424  7.328504
##   0.0034304693  9.628085  0.2002565  7.328369
##   0.0035938137  9.627915  0.2002713  7.328227
##   0.0037649358  9.627738  0.2002868  7.328079
##   0.0039442061  9.627553  0.2003030  7.327924
##   0.0041320124  9.627360  0.2003198  7.327763
##   0.0043287613  9.627160  0.2003372  7.327594
##   0.0045348785  9.626950  0.2003556  7.327418
##   0.0047508102  9.626729  0.2003750  7.327232
##   0.0049770236  9.626498  0.2003953  7.327037
##   0.0052140083  9.626256  0.2004167  7.326834
##   0.0054622772  9.626004  0.2004391  7.326621
##   0.0057223677  9.625741  0.2004623  7.326399
##   0.0059948425  9.625467  0.2004865  7.326172
##   0.0062802914  9.625182  0.2005117  7.325936
##   0.0065793322  9.624884  0.2005381  7.325693
##   0.0068926121  9.624575  0.2005655  7.325444
##   0.0072208090  9.624251  0.2005943  7.325182
##   0.0075646333  9.623915  0.2006242  7.324914
##   0.0079248290  9.623564  0.2006556  7.324634
##   0.0083021757  9.623200  0.2006881  7.324345
##   0.0086974900  9.622821  0.2007222  7.324043
##   0.0091116276  9.622428  0.2007573  7.323732
##   0.0095454846  9.622019  0.2007940  7.323405
##   0.0100000000  9.621592  0.2008325  7.323063
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.01.

##     alpha lambda
## 100     1   0.01
##     alpha       lambda     RMSE  Rsquared      MAE    RMSESD RsquaredSD     MAESD
## 1       1 0.0001000000 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 2       1 0.0001047616 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 3       1 0.0001097499 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 4       1 0.0001149757 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 5       1 0.0001204504 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 6       1 0.0001261857 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 7       1 0.0001321941 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 8       1 0.0001384886 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 9       1 0.0001450829 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 10      1 0.0001519911 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 11      1 0.0001592283 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 12      1 0.0001668101 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 13      1 0.0001747528 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 14      1 0.0001830738 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 15      1 0.0001917910 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 16      1 0.0002009233 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 17      1 0.0002104904 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 18      1 0.0002205131 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 19      1 0.0002310130 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 20      1 0.0002420128 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 21      1 0.0002535364 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 22      1 0.0002656088 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 23      1 0.0002782559 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 24      1 0.0002915053 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 25      1 0.0003053856 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 26      1 0.0003199267 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 27      1 0.0003351603 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 28      1 0.0003511192 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 29      1 0.0003678380 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 30      1 0.0003853529 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 31      1 0.0004037017 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 32      1 0.0004229243 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 33      1 0.0004430621 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 34      1 0.0004641589 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 35      1 0.0004862602 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 36      1 0.0005094138 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 37      1 0.0005336699 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 38      1 0.0005590810 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 39      1 0.0005857021 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 40      1 0.0006135907 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 41      1 0.0006428073 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 42      1 0.0006734151 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 43      1 0.0007054802 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 44      1 0.0007390722 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 45      1 0.0007742637 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 46      1 0.0008111308 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 47      1 0.0008497534 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 48      1 0.0008902151 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 49      1 0.0009326033 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 50      1 0.0009770100 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 51      1 0.0010235310 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 52      1 0.0010722672 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 53      1 0.0011233240 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 54      1 0.0011768120 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 55      1 0.0012328467 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 56      1 0.0012915497 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 57      1 0.0013530478 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 58      1 0.0014174742 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 59      1 0.0014849683 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 60      1 0.0015556761 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 61      1 0.0016297508 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 62      1 0.0017073526 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 63      1 0.0017886495 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 64      1 0.0018738174 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 65      1 0.0019630407 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 66      1 0.0020565123 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 67      1 0.0021544347 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 68      1 0.0022570197 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 69      1 0.0023644894 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 70      1 0.0024770764 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 71      1 0.0025950242 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 72      1 0.0027185882 9.628757 0.2001968 7.328930 0.4933120 0.03406362 0.3244091
## 73      1 0.0028480359 9.628693 0.2002031 7.328875 0.4933233 0.03406540 0.3244276
## 74      1 0.0029836472 9.628553 0.2002156 7.328759 0.4933087 0.03406549 0.3244242
## 75      1 0.0031257158 9.628403 0.2002287 7.328635 0.4932924 0.03406551 0.3244197
## 76      1 0.0032745492 9.628247 0.2002424 7.328504 0.4932744 0.03406542 0.3244144
## 77      1 0.0034304693 9.628085 0.2002565 7.328369 0.4932573 0.03406547 0.3244099
## 78      1 0.0035938137 9.627915 0.2002713 7.328227 0.4932391 0.03406544 0.3244062
## 79      1 0.0037649358 9.627738 0.2002868 7.328079 0.4932205 0.03406546 0.3244025
## 80      1 0.0039442061 9.627553 0.2003030 7.327924 0.4932028 0.03406567 0.3243998
## 81      1 0.0041320124 9.627360 0.2003198 7.327763 0.4931839 0.03406580 0.3243975
## 82      1 0.0043287613 9.627160 0.2003372 7.327594 0.4931651 0.03406603 0.3243958
## 83      1 0.0045348785 9.626950 0.2003556 7.327418 0.4931446 0.03406625 0.3243935
## 84      1 0.0047508102 9.626729 0.2003750 7.327232 0.4931225 0.03406655 0.3243908
## 85      1 0.0049770236 9.626498 0.2003953 7.327037 0.4930993 0.03406691 0.3243872
## 86      1 0.0052140083 9.626256 0.2004167 7.326834 0.4930751 0.03406735 0.3243795
## 87      1 0.0054622772 9.626004 0.2004391 7.326621 0.4930500 0.03406782 0.3243713
## 88      1 0.0057223677 9.625741 0.2004623 7.326399 0.4930236 0.03406829 0.3243602
## 89      1 0.0059948425 9.625467 0.2004865 7.326172 0.4929945 0.03406850 0.3243407
## 90      1 0.0062802914 9.625182 0.2005117 7.325936 0.4929638 0.03406869 0.3243219
## 91      1 0.0065793322 9.624884 0.2005381 7.325693 0.4929281 0.03406849 0.3242989
## 92      1 0.0068926121 9.624575 0.2005655 7.325444 0.4928909 0.03406845 0.3242754
## 93      1 0.0072208090 9.624251 0.2005943 7.325182 0.4928511 0.03406846 0.3242504
## 94      1 0.0075646333 9.623915 0.2006242 7.324914 0.4928092 0.03406875 0.3242255
## 95      1 0.0079248290 9.623564 0.2006556 7.324634 0.4927654 0.03406918 0.3241992
## 96      1 0.0083021757 9.623200 0.2006881 7.324345 0.4927197 0.03406987 0.3241664
## 97      1 0.0086974900 9.622821 0.2007222 7.324043 0.4926739 0.03407112 0.3241289
## 98      1 0.0091116276 9.622428 0.2007573 7.323732 0.4926238 0.03407250 0.3240907
## 99      1 0.0095454846 9.622019 0.2007940 7.323405 0.4925726 0.03407415 0.3240515
## 100     1 0.0100000000 9.621592 0.2008325 7.323063 0.4925203 0.03407606 0.3240121

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  1.255514e+02
## PC1         -1.298723e-01
## PC2         -2.600514e-01
## PC3         -1.234301e-01
## PC4         -9.568324e-02
## PC5          5.082205e-02
## PC6         -3.161405e-02
## PC7         -4.871874e-02
## PC8         -1.182496e-02
## PC9         -1.231822e-02
## PC10        -5.376536e-03
## PC11        -1.452548e-01
## PC12        -1.476409e-01
## PC13         8.763595e-02
## PC14         7.771212e-02
## PC15        -5.684620e-03
## PC16         1.060543e-01
## PC17        -6.495879e-02
## PC18        -1.100534e-01
## PC19         1.238598e-02
## PC20         1.216020e-01
## PC21         2.235409e-02
## PC22         3.451018e-02
## PC23         6.763238e-02
## PC24        -2.108389e-01
## PC25         2.832458e-02
## PC26         9.670610e-02
## PC27         9.073414e-02
## PC28         3.915594e-02
## PC29         1.469626e-01
## PC30         1.525033e-02
## PC31        -5.692113e-02
## PC32        -2.210111e-01
## PC33         7.002469e-02
## PC34         3.433023e-01
## PC36        -7.127674e-03
## PC37        -1.162535e-01
## PC38        -8.295870e-05
## PC39        -4.647233e-02
## PC40        -1.028411e-01
## PC41         2.063495e-02
## PC42        -4.918385e-02
## PC44         2.320489e-02
## PC45        -1.070338e-02
## PC46         1.164440e-01
## PC47        -1.225217e-01
## PC48         1.860308e-02
## PC49        -1.655462e-02
## PC50        -8.590795e-02
## PC51         1.719985e-02
## PC52        -1.654400e-02
## PC53         4.646695e-02
## PC54        -4.027648e-02
## PC55         1.946765e-02
## PC57        -1.514104e-01
## PC58        -2.042308e-02
## PC59         2.421222e-01
## PC60        -9.946129e-02
## PC61         9.710888e-02
## PC62        -1.149038e-01
## PC63        -1.013142e-01
## PC64        -2.263178e-01
## PC65        -3.519116e-02
## PC66        -1.320295e-01
## PC67        -2.937963e-02
## PC68         2.631703e-01
## PC69         1.077511e-01
## PC70        -1.171616e-02
## PC71         2.494247e-01
## PC72        -5.314494e-04
## PC73         5.268753e-02
## PC74        -9.890600e-02
## PC75        -1.938554e-01
## PC76         9.764328e-03
## PC77         1.542149e-01
## PC78         4.919469e-02
## PC79         1.109009e-01
## PC80        -8.258235e-02
## PC81         2.061036e-01
## PC82         1.013928e-01
## PC83        -2.413708e-01
## PC84         2.100872e-01
## PC85         3.434338e-01
## PC86        -7.652314e-02
## PC87         4.457219e-01
## PC88        -2.006078e-01
## PC89        -1.901147e-01
## PC90        -1.805138e-01
## PC91         5.984314e-02
## PC92         3.416878e-02
## PC93        -6.015802e-03
## PC94        -2.106125e-01
## PC95         2.679723e-04
## PC96        -2.041438e-01
## PC97        -1.295453e-01
## PC98        -7.977836e-02
## PC99        -1.271480e-01
## PC101       -1.093273e-01
## PC102       -2.101682e-01
## PC103        1.053122e-01
## PC104       -1.506616e-01
## PC105        1.658028e-01
## PC106        2.843931e-01
## PC107       -4.107080e-02
## PC108        1.742891e-01
## PC109       -2.886080e-02
## PC110       -5.672525e-02
## PC111       -1.683311e-01
## PC112       -6.304220e-03
## PC113        9.302556e-02
## PC114       -1.413251e-01
## PC115       -4.034061e-01
## PC116       -4.770328e-02
## PC117       -1.455947e-02
## PC118        1.518460e-01
## PC119       -2.248756e-01
## PC120        6.098526e-02
## PC121       -1.001928e-01
## PC122        1.553259e-01
## PC123       -2.363524e-01
## PC124        1.354237e-01
## PC125        1.030857e-01
## PC126        1.181573e-01
## PC127        5.295171e-02
## PC128       -1.522229e-01
## PC129       -7.851309e-02
## PC130        8.734756e-02
## PC131       -2.966303e-01
## PC132        6.152004e-02
## PC133        4.252599e-02
## PC134        2.275716e-01
## PC135        1.918110e-01
## PC136        9.601589e-02
## PC137       -1.197720e-01
## PC138        1.383688e-01
## PC139       -2.054473e-01
## PC140       -4.580045e-02
## PC141        8.578545e-02
## PC142       -7.112606e-02
## PC143        7.781291e-02
## PC144        3.678276e-01
## PC145        6.121409e-02
## PC146        2.708227e-01
## PC147        3.133738e-02
## PC148       -6.746369e-02
## PC149        3.498798e-02
## PC150        6.728299e-02
## PC151        1.467252e-01
## PC152       -1.902990e-02
## PC153        1.387096e-01
## PC154       -1.781924e-01
## PC155        1.994517e-01
## PC156        2.442047e-01
## PC157        7.530545e-03
## PC158       -8.099376e-02
## PC159        4.502749e-01
## PC160       -1.459776e-02
## PC161        8.784971e-02
## PC162       -3.902729e-01
## PC163        3.040614e-01
## PC164        6.320154e-02

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   104.8   122.9   126.6   125.8   129.4   137.9 
## [1] "glmnet LASSO Test MSE: 92.7288734215879"
## [1] "glmnet LASSO Test RMSE: 9.62958324236246"
## [1] "glmnet LASSO Test MSE (Org Scale): 92.7288734215879"
## [1] "glmnet LASSO Test RMSE (Org Scale): 9.62958324236246"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.707 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  164 predictor
## 
## Pre-processing: centered (164), scaled (164) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE       Rsquared    MAE     
##   0.00000000  10.740673         NaN  8.176476
##   0.01010101  10.636310  0.08054327  8.103499
##   0.02020202  10.545888  0.08054327  8.041118
##   0.03030303  10.469772  0.08054327  7.988135
##   0.04040404  10.410668  0.08318423  7.945482
##   0.05050505  10.360653  0.09343877  7.908528
##   0.06060606  10.312276  0.10413069  7.871622
##   0.07070707  10.265454  0.11383566  7.835991
##   0.08080808  10.221609  0.12178841  7.802879
##   0.09090909  10.180980  0.12882159  7.771726
##   0.10101010  10.142173  0.13491040  7.741814
##   0.11111111  10.106561  0.13989973  7.713700
##   0.12121212  10.074123  0.14432258  7.687718
##   0.13131313  10.044176  0.14851574  7.663812
##   0.14141414  10.015478  0.15265112  7.641146
##   0.15151515   9.987705  0.15658075  7.619140
##   0.16161616   9.962041  0.16005314  7.599077
##   0.17171717   9.938053  0.16317864  7.580037
##   0.18181818   9.915559  0.16608790  7.562125
##   0.19191919   9.894247  0.16887172  7.545199
##   0.20202020   9.874727  0.17133655  7.529376
##   0.21212121   9.856493  0.17363442  7.514536
##   0.22222222   9.839380  0.17576509  7.500448
##   0.23232323   9.822725  0.17788673  7.486630
##   0.24242424   9.806367  0.18004642  7.473486
##   0.25252525   9.790549  0.18210545  7.460574
##   0.26262626   9.775439  0.18404661  7.448257
##   0.27272727   9.761376  0.18582268  7.437020
##   0.28282828   9.748209  0.18748135  7.426515
##   0.29292929   9.736044  0.18897239  7.416933
##   0.30303030   9.724749  0.19034319  7.407788
##   0.31313131   9.714093  0.19165262  7.399139
##   0.32323232   9.704186  0.19285852  7.391046
##   0.33333333   9.694926  0.19397291  7.383263
##   0.34343434   9.686171  0.19500941  7.375801
##   0.35353535   9.678382  0.19588306  7.369035
##   0.36363636   9.671380  0.19664406  7.362983
##   0.37373737   9.664654  0.19737882  7.357257
##   0.38383838   9.658404  0.19805303  7.352236
##   0.39393939   9.652684  0.19865287  7.347895
##   0.40404040   9.647387  0.19919329  7.344019
##   0.41414141   9.642813  0.19961933  7.340770
##   0.42424242   9.638565  0.20001217  7.337818
##   0.43434343   9.634259  0.20043548  7.334793
##   0.44444444   9.630151  0.20083544  7.331842
##   0.45454545   9.626363  0.20118963  7.329147
##   0.46464646   9.622692  0.20154942  7.326431
##   0.47474747   9.619097  0.20191315  7.323738
##   0.48484848   9.615833  0.20223151  7.321398
##   0.49494949   9.612989  0.20249066  7.319358
##   0.50505051   9.610319  0.20273424  7.317397
##   0.51515152   9.607776  0.20296984  7.315509
##   0.52525253   9.605494  0.20317113  7.313886
##   0.53535354   9.603457  0.20333877  7.312564
##   0.54545455   9.601584  0.20348674  7.311304
##   0.55555556   9.599904  0.20361467  7.310203
##   0.56565657   9.598498  0.20370590  7.309232
##   0.57575758   9.597138  0.20380285  7.308222
##   0.58585859   9.595968  0.20387619  7.307365
##   0.59595960   9.594833  0.20395177  7.306563
##   0.60606061   9.593641  0.20404658  7.305728
##   0.61616162   9.592602  0.20412510  7.304935
##   0.62626263   9.591700  0.20419041  7.304204
##   0.63636364   9.590913  0.20424497  7.303479
##   0.64646465   9.590324  0.20427233  7.302841
##   0.65656566   9.589767  0.20430324  7.302208
##   0.66666667   9.589301  0.20432666  7.301686
##   0.67676768   9.589016  0.20432785  7.301260
##   0.68686869   9.588813  0.20432343  7.300916
##   0.69696970   9.588635  0.20432239  7.300541
##   0.70707071   9.588583  0.20430746  7.300334
##   0.71717172   9.588719  0.20426937  7.300323
##   0.72727273   9.588941  0.20422417  7.300401
##   0.73737374   9.589245  0.20417370  7.300574
##   0.74747475   9.589757  0.20409730  7.300824
##   0.75757576   9.590414  0.20400461  7.301171
##   0.76767677   9.591188  0.20390007  7.301593
##   0.77777778   9.592073  0.20378511  7.302090
##   0.78787879   9.593083  0.20365805  7.302622
##   0.79797980   9.594175  0.20352592  7.303201
##   0.80808081   9.595316  0.20339283  7.303800
##   0.81818182   9.596514  0.20325919  7.304489
##   0.82828283   9.597811  0.20311739  7.305257
##   0.83838384   9.599157  0.20297441  7.306105
##   0.84848485   9.600494  0.20284028  7.306941
##   0.85858586   9.601934  0.20269657  7.307833
##   0.86868687   9.603439  0.20255020  7.308798
##   0.87878788   9.605089  0.20238865  7.309945
##   0.88888889   9.606864  0.20221548  7.311279
##   0.89898990   9.608678  0.20204289  7.312740
##   0.90909091   9.610610  0.20185842  7.314229
##   0.91919192   9.612649  0.20166456  7.315823
##   0.92929293   9.614788  0.20146225  7.317547
##   0.93939394   9.617034  0.20125067  7.319382
##   0.94949495   9.619315  0.20104033  7.321251
##   0.95959596   9.621656  0.20082686  7.323109
##   0.96969697   9.624064  0.20061096  7.325030
##   0.97979798   9.626558  0.20038991  7.327087
##   0.98989899   9.629118  0.20016628  7.329227
##   1.00000000   9.631757  0.19993708  7.331435
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.7070707.

##     fraction
## 71 0.7070707
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##          PC1          PC2          PC3          PC4          PC5          PC6          PC7          PC8          PC9 
## -1.424759281 -2.905573692 -1.321880264 -0.990975047  0.482156096 -0.264676646 -0.442113493 -0.050793492 -0.048547880 
##         PC11         PC12         PC13         PC14         PC16         PC17         PC18         PC19         PC20 
## -1.292131595 -1.219916838  0.687993659  0.578233980  0.784754329 -0.421434288 -0.729519522  0.024438093  0.741002594 
##         PC21         PC22         PC23         PC24         PC25         PC26         PC27         PC28         PC29 
##  0.065602954  0.060356027  0.155852581 -0.517885031  0.004413801  0.165073540  0.148242896  0.025712032  0.252636654 
##         PC31         PC32         PC33         PC34         PC37         PC39         PC40         PC46         PC47 
## -0.044144357 -0.370009311  0.075088775  0.562426246 -0.123734915 -0.003322229 -0.084965145  0.100640564 -0.114445165 
##         PC50         PC57         PC59         PC60         PC61         PC62         PC63         PC64         PC66 
## -0.044038810 -0.138447034  0.257711328 -0.061795131  0.066491263 -0.082098604 -0.069013829 -0.231286347 -0.093882228 
##         PC68         PC69         PC71         PC74         PC75         PC77         PC79         PC80         PC81 
##  0.255872831  0.068123233  0.252562286 -0.056262840 -0.176146927  0.120872462  0.065728965 -0.039002844  0.179173974 
##         PC82         PC83         PC84         PC85         PC86         PC87         PC88         PC89         PC90 
##  0.054773863 -0.225029993  0.181227827  0.340635496 -0.027685846  0.455764108 -0.172712684 -0.151924429 -0.151277663 
##         PC94         PC96         PC97         PC98         PC99        PC101        PC102        PC103        PC104 
## -0.173400126 -0.157380498 -0.082297416 -0.020826812 -0.084731908 -0.041873861 -0.168316912  0.046027897 -0.100638375 
##        PC105        PC106        PC108        PC111        PC113        PC114        PC115        PC118        PC119 
##  0.116658961  0.258231041  0.123870661 -0.123582279  0.035670689 -0.092657741 -0.387850355  0.098205841 -0.181671106 
##        PC121        PC122        PC123        PC124        PC125        PC126        PC128        PC129        PC130 
## -0.034667175  0.108940290 -0.203912501  0.078664212  0.044342978  0.061066321 -0.103950454 -0.011784781  0.029231367 
##        PC131        PC132        PC134        PC135        PC136        PC137        PC138        PC139        PC141 
## -0.260860418  0.001506585  0.188570163  0.134471399  0.027980488 -0.057560171  0.084411698 -0.153139584  0.022680935 
##        PC142        PC143        PC144        PC145        PC146        PC151        PC153        PC154        PC155 
## -0.011368996  0.012841400  0.328561321  0.004342598  0.224944337  0.082998239  0.077911199 -0.110589656  0.148156983 
##        PC156        PC158        PC159        PC161        PC162        PC163 
##  0.187922753 -0.018870079  0.403930451  0.035022196 -0.339648142  0.251923948

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   106.5   123.2   126.5   125.7   129.0   136.2 
## [1] "lars  Test MSE: 92.1350105563208"
## [1] "lars  Test RMSE: 9.59869837823446"
## [1] "lars  Test MSE (Org Scale): 92.1350105563208"
## [1] "lars  Test RMSE (Org Scale): 9.59869837823446"

Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2             knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3            
##  [5] lars_1.2                   doParallel_1.0.14          iterators_1.0.10           caret_6.0-81              
##  [9] leaps_3.0                  ggforce_0.1.3              rlist_0.4.6.1              car_3.0-2                 
## [13] carData_3.0-2              bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0           
## [17] caTools_1.17.1.1           mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1           
## [21] ggstance_0.3.1             lattice_0.20-35            DT_0.5                     ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-14             
## [29] MASS_7.3-50                PerformanceAnalytics_1.5.2 xts_0.11-2                 zoo_1.8-4                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.7.8                purrr_0.2.5               
## [37] readr_1.3.1                tidyr_0.8.2                tibble_1.4.2               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-4               sp_1.3-1                  
## [45] pacman_0.5.0              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.2.0       backports_1.1.3    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.1      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.18      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_2.0.0        crayon_1.3.4       jsonlite_1.5       bindr_0.1.1        survival_2.42-3    glue_1.3.0        
## [25] registry_0.5       gtable_0.2.0       ppcor_1.1          ipred_0.9-8        abind_1.4-5        rngtools_1.3.1    
## [31] bibtex_0.4.2       Rcpp_1.0.0         xtable_1.8-3       units_0.6-2        foreign_0.8-70     stats4_3.5.1      
## [37] lava_1.6.4         prodlim_2018.04.18 htmlwidgets_1.3    httr_1.4.0         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.5       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.1        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.5.1        evaluate_0.12      ggdendro_0.1-20    yaml_2.2.0         ModelMetrics_1.2.2
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.6           xml2_1.2.0         compiler_3.5.1    
## [67] rstudioapi_0.8     curl_3.2           tweenr_1.0.1       stringi_1.2.4      gdtools_0.1.7      pillar_1.3.1      
## [73] data.table_1.11.8  bitops_1.0-6       insight_0.1.2      httpuv_1.4.5       R6_2.3.0           promises_1.0.1    
## [79] gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0   pkgmaker_0.27      withr_2.1.2       
## [85] nortest_1.0-4      mgcv_1.8-24        hms_0.4.2          quadprog_1.5-5     grid_3.5.1         rpart_4.1-13      
## [91] timeDate_3043.102  class_7.3-14       rmarkdown_1.11     shiny_1.2.0        lubridate_1.7.4